Skip to content
Snippets Groups Projects
Select Git revision
  • d3f6f569574fe3317d326b48c55301714668d7e7
  • master default protected
  • RAP-699_update-readme
  • gec-134-use-ska-sofia-fork
  • gec-115-add-facet-functions
  • add-coverage-badge
  • releases/1.6
  • releases/1.5
  • rap-457-implement-ci-v2
  • test_heroku
  • action-python-publish
  • v1.7.1
  • v1.7.0
  • v1.6.2
  • v1.6.1
  • v1.6.post1
  • v1.6
  • v1.5.1
  • v1.5.0
  • v1.4.11
  • v1.4.10
  • v1.4.9
  • v1.4.8
  • v1.4.7
  • v1.4.6
  • v1.4.5
  • v1.4.4
  • v1.4.3
  • v1.4.2
  • v1.4.1
  • v1.4.0
31 results

group.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    group.py 19.99 KiB
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    #
    # This operation implements grouping of sources into patches
    #
    # This program is free software; you can redistribute it and/or modify
    # it under the terms of the GNU General Public License as published by
    # the Free Software Foundation; either version 2 of the License, or
    # (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU General Public License for more details.
    #
    # You should have received a copy of the GNU General Public License
    # along with this program; if not, write to the Free Software
    # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    
    import logging
    
    log = logging.getLogger('LSMTool.GROUP')
    log.debug('Loading GROUP module.')
    
    
    def run(step, parset, LSM):
    
        outFile = parset.getString('.'.join(["LSMTool.Steps", step, "OutFile"]), '' )
        algorithm = parset.getString('.'.join(["LSMTool.Steps", step, "Algorithm"]), 'single' )
        root = parset.getString('.'.join(["LSMTool.Steps", step, "Root"]), 'Patch' )
        targetFlux = parset.getString('.'.join(["LSMTool.Steps", step, "TargetFlux"]), '1.0 Jy' )
        numClusters = parset.getInt('.'.join(["LSMTool.Steps", step, "NumClusters"]), 100 )
        threshold = parset.getString('.'.join(["LSMTool.Steps", step, "Threshold"]), '1.0 Jy' )
        FWHM = parset.getString('.'.join(["LSMTool.Steps", step, "FWHM"]), None )
        applyBeam = parset.getBool('.'.join(["LSMTool.Steps", step, "ApplyBeam"]), False )
        method = parset.getString('.'.join(["LSMTool.Steps", step, "Method"]), 'mid' )
        pad_index = parset.getBool('.'.join(["LSMTool.Steps", step, "PadIndex"]), False )
        byPatch = parset.getBool('.'.join(["LSMTool.Steps", step, "ByPatch"]), False )
        facet = parset.getString('.'.join(["LSMTool.Steps", step, "Facet"]), '' )
        kernelSize = parset.getString('.'.join(["LSMTool.Steps", step, "KernelSize"]), '0.1' )
        nIterations = parset.getInt('.'.join(["LSMTool.Steps", step, "NIterations"]), '100' )
        lookDistance = parset.getString('.'.join(["LSMTool.Steps", step, "LookDistance"]), '0.2' )
        groupingDistance = parset.getString('.'.join(["LSMTool.Steps", step, "GroupingDistance"]), '0.01' )
    
        try:
            group(LSM, algorithm, targetFlux, numClusters, FWHM, threshold, applyBeam, root,
                  pad_index, method, facet, byPatch, float(kernelSize), nIterations,
                  float(lookDistance), float(groupingDistance))
            result = 0
        except Exception as e:
            log.error(e)
            result = 1
    
        # Write to outFile
        if outFile != '' and result == 0:
            LSM.write(outFile, clobber=True)
    
        return result
    
    
    def group(LSM, algorithm, targetFlux=None, patchNames=None, weightBySize=False,
              numClusters=100, FWHM=None, threshold=0.1, applyBeam=False, root='Patch',
              pad_index=False, method='mid', facet="", byPatch=False, kernelSize=0.1,
              nIterations=100, lookDistance=0.2, groupingDistance=0.01):
        """
        Groups sources into patches.
    
        Parameters
        ----------
        LSM : SkyModel object
            Input sky model
        algorithm : str
            Algorithm to use for grouping:
            - 'single' => all sources are grouped into a single patch
            - 'every' => every source gets a separate patch named 'source_patch'
            - 'cluster' => SAGECAL clustering algorithm that groups sources into
                specified number of clusters (specified by the numClusters parameter)
            - 'tessellate' => group into tiles whose total flux approximates
                the target flux (specified by the targetFlux parameter)
            - 'threshold' => group by convolving the sky model with a Gaussian beam
                and then thresholding to find islands of emission (NOTE: all sources
                are currently considered to be point sources of flux unity)
            - 'facet' => group by facets using as an input a fits file. It requires
                the use of the additional parameter 'facet' to enter the name of the
                fits file.
            - 'voronoi' => given a previously grouped sky model, Voronoi tesselate
                using the patch positions for patches above the target flux
                (specified by the targetFlux parameter) or whose names match the
                input names (specified by the patchNames parameter)
            - 'meanshift' => use the meanshift clustering algorithm
            - the filename of a mask image => group by masked regions (where mask =
                True). Sources outside of masked regions are given patches of their
                own
        targetFlux : str or float, optional
            Target flux for 'tessellate' (the total flux of each tile will be close
            to this value) and 'voronoi' algorithms. The target flux can be specified
            as either a float in Jy or as a string with units (e.g., '25.0 mJy')
        patchNames : list, optional
            List of patch names to use for the 'voronoi' algorithm. If both patchNames
            and targetFlux are given, the targetFlux selection is applied first
        weightBySize : bool, optional
            If True, fluxes are weighted by patch size (as median_size / size) when
            the targetFlux criterion is applied. Patches with sizes below the median
            (flux-weighted) size are upweighted and those above the median are
            downweighted
        numClusters : int, optional
            Number of clusters for clustering. Sources are grouped around the
            numClusters brightest sources
        FWHM : str or float, optional
            FWHM of convolving Gaussian used for thresholding. The FWHM can
            be specified as either a float in degrees or as a string with units
            (e.g., '25.0 arcsec')
        threshold : float, optional
            Value between 0 and 1 above which emission is considered for thresholding
        applyBeam : bool, optional
            If True, fluxes will be attenuated by the beam
        root : str, optional
            Root string from which patch names are constructed. For 'single', the
            patch name will be set to root; for the other grouping algorithms, the
            patch names will be 'root_INDX', where INDX is an integer ranging from
            (0:nPatches)
        pad_index : bool, optional
            If True, pad the INDX used in the patch names. E.g., facet_patch_001
            instead of facet_patch_1
        method : None or str, optional
            This parameter specifies the method used to set the patch positions:
            - 'mid' => the position is set to the midpoint of the patch
            - 'mean' => the positions is set to the mean RA and Dec of the patch
            - 'wmean' => the position is set to the flux-weighted mean RA and
            Dec of the patch
            - 'zero' => set all positions to [0.0, 0.0]
        facet : str, optional
            Facet fits file used with the algorithm 'facet'
        byPatch : bool, optional
            For the 'tessellate' or 'meanshift' algorithms, use patches instead of sources
        kernelSize : float, optional
            Kernel size in degrees for meanshift grouping
        nIterations : int, optional
            Number of iterations for meanshift grouping
        lookDistance : float, optional
            Look distance in degrees for meanshift grouping
        groupingDistance : float, optional
            Grouping distance in degrees for meanshift grouping
    
        Examples
        --------
        Tesselate the sky model into patches with approximately 30 Jy total
        flux:
    
            >>> LSM = lsmtool.load('sky.model')
            >>> group(LSM, 'tessellate', targetFlux=30.0)
    
        """
        from . import _tessellate
        from . import _cluster
        from . import _threshold
        try:
            from . import _meanshiftc as _meanshift
        except ImportError:
            from . import _meanshift
        import numpy as np
        import os
        from itertools import groupby
    
        if len(LSM) == 0:
            log.error('Sky model is empty.')
            return
    
        if algorithm.lower() == 'single':
            LSM.ungroup()
            addSingle(LSM, root)
    
        elif algorithm.lower() == 'every':
            LSM.ungroup()
            addEvery(LSM)
    
        elif algorithm.lower() == 'cluster':
            LSM.ungroup()
            patches = _cluster.compute_patch_center(LSM, applyBeam=applyBeam)
            patchCol = _cluster.create_clusters(LSM, patches, numClusters,
                                                applyBeam=applyBeam, root=root,
                                                pad_index=pad_index)
            LSM.setColValues('Patch', patchCol, index=2)
    
        elif algorithm.lower() == 'tessellate':
            if targetFlux is None:
                raise ValueError('Please specify the targetFlux parameter.')
            else:
                units = 'Jy'
                if type(targetFlux) is str:
                    parts = [''.join(g).strip() for _, g in groupby(targetFlux, str.isalpha)]
                    targetFlux = float(parts[0])
                    if len(parts) == 2:
                        units = parts[1]
            if byPatch:
                if 'Patch' not in LSM.table.keys():
                    raise ValueError('Sky model must be grouped before "byPatch" can be used.')
                x, y, midRA, midDec = LSM._getXY(byPatch=True)
                f = LSM.getColValues('I', units=units, applyBeam=applyBeam, aggregate='sum')
            else:
                LSM.ungroup()
                x, y, midRA, midDec = LSM._getXY()
                f = LSM.getColValues('I', units=units, applyBeam=applyBeam)
            vobin = _tessellate.bin2D(np.array(x), np.array(y), f, target_flux=targetFlux)
            try:
                vobin.bin_voronoi()
                patchCol = _tessellate.bins2Patches(vobin, root=root, pad_index=pad_index)
                if byPatch:
                    newPatchNames = patchCol.copy()
                    origPatchNames = LSM.getPatchNames()
                    patchCol = np.zeros(len(LSM), dtype='U100')
                    for newPatchName, origPatchName in zip(newPatchNames, origPatchNames):
                        ind = np.array(LSM.getRowIndex(origPatchName))
                        patchCol[ind] = newPatchName
                    LSM.setColValues('Patch', patchCol, index=2)
                else:
                    LSM.setColValues('Patch', patchCol, index=2)
            except ValueError:
                # Catch error in some cases with high target flux relative to
                # total model flux
                addSingle(LSM, root+'_0')
    
        elif algorithm.lower() == 'threshold':
            from astropy.coordinates import SkyCoord, Angle
    
            if threshold is None:
                threshold = 0.1
            if threshold < 0.01:
                threshold = 0.01
            if threshold > 1.0:
                threshold = 1.0
            if FWHM is None:
                raise ValueError('Please specify the FWHM parameter.')
            else:
                units = 'degree'
                if type(FWHM) is str:
                    parts = [''.join(g).strip() for _, g in groupby(FWHM, str.isalpha)]
                    FWHM = float(parts[0])
                    if len(parts) == 2:
                        units = parts[1]
                fwhmArcsec = Angle(FWHM, unit=units).to('arcsec').value
    
            patchCol = _threshold.getPatchNamesByThreshold(LSM, fwhmArcsec, threshold,
                root=root, pad_index=pad_index)
            LSM.setColValues('Patch', patchCol, index=2)
    
        elif algorithm.lower() == 'voronoi':
            from astropy.coordinates import SkyCoord
            import astropy.units as u
    
            if 'Patch' not in LSM.table.keys():
                raise ValueError('Sky model must be grouped before "voronoi" can be used.')
            else:
                dirs = LSM.getPatchPositions()
    
            if targetFlux is not None:
                # Select only those patches that lie above the target flux
                units = 'Jy'
                if type(targetFlux) is str:
                    parts = [''.join(g).strip() for _, g in groupby(targetFlux, str.isalpha)]
                    targetFlux = float(parts[0])
                    if len(parts) == 2:
                        units = parts[1]
                dirs_names = []
                names = LSM.getPatchNames()
                fluxes = LSM.getColValues('I', aggregate='sum', units=units, applyBeam=applyBeam)
                if weightBySize:
                    sizes = LSM.getPatchSizes(units='arcsec', weight=True, applyBeam=applyBeam)
                    sizes[sizes < 1.0] = 1.0
                    bright_ind = np.where(fluxes >= targetFlux)
                    medianSize = np.median(sizes[bright_ind])
                    weights = medianSize / sizes
                    weights[weights > 1.0] = 1.0
                    weights[weights < 0.5] = 0.5
                    fluxes *= weights
                for name, flux in zip(names, fluxes):
                    if flux >= targetFlux:
                        dirs_names.append(name)
            else:
                # Use all patches
                dirs_names = [name for name, d in dirs.items()]
    
            if patchNames is not None:
                # Select only those patches with names in patchNames
                dirs_names = [name for name in dirs_names if name in patchNames]
    
            # Do grouping
            if len(dirs_names) == 0:
                log.warning('No patches meet specified selection criteria. All sources '
                            'placed in a single patch.')
                LSM.ungroup()
                addSingle(LSM, root)
            else:
                dirs_ras = []
                dirs_decs = []
                for name in dirs_names:
                    d = dirs[name]
                    dirs_ras.append(d[0])
                    dirs_decs.append(d[1])
                RADeg = LSM.getColValues('Ra', units='degree')
                DecDeg = LSM.getColValues('Dec', units='degree')
                patchNames = []
                for r, d in zip(RADeg, DecDeg):
                    dists = SkyCoord(r*u.degree, d*u.degree).separation(
                        SkyCoord(dirs_ras*u.degree, dirs_decs*u.degree))
                    patchNames.append(dirs_names[np.argmin(dists)])
                LSM.setColValues('Patch', patchNames, index=2)
    
        elif algorithm.lower() == 'meanshift':
            if byPatch:
                if 'Patch' not in LSM.table.keys():
                    raise ValueError('Sky model must be grouped before "byPatch" can be used.')
                x, y, midRA, midDec = LSM._getXY(byPatch=True)
                f = LSM.getColValues('I', applyBeam=applyBeam, aggregate='sum')
            else:
                addEvery(LSM)
                x, y, midRA, midDec = LSM._getXY()
                f = LSM.getColValues('I', applyBeam=applyBeam)
            crdelt = 0.066667  # WCS delta in deg/pixel, as used by LSM._getXY()
            grouper = _meanshift.Grouper(list(zip(x, y)), f, kernelSize/crdelt, nIterations,
                                         lookDistance/crdelt, groupingDistance/crdelt)
            grouper.run()
            clusters = grouper.grouping()
            patchNames = LSM.getPatchNames()
            table = LSM.table.copy()
            for cindx, cluster in enumerate(clusters):
                if pad_index:
                    name = '{0}_{1}'.format(root, str(int(cindx)).zfill(int(np.ceil(np.log10(len(cluster)+1)))))
                else:
                    name = '{0}_{1}'.format(root, str(int(cindx)))
                patches = patchNames[cluster]
                for patchName in patches:
                    indices = LSM.getRowIndex(patchName)
                    table['Patch'][indices] = name
            LSM.table = table
            LSM._updateGroups()
    
        elif algorithm.lower() == 'facet':
            if os.path.exists(facet):
                RADeg = LSM.getColValues('Ra', units='degree')
                DecDeg = LSM.getColValues('Dec', units='degree')
                facet_col = get_facet_values(facet, RADeg, DecDeg, root=root, pad_index=pad_index)
                LSM.setColValues('Patch', facet_col, index=2)
            else:
                raise ValueError('Please enter the facet filename in the facet parameter.')
    
        elif os.path.exists(algorithm):
            # Mask image
            mask = algorithm
            RARad = LSM.getColValues('Ra', units='radian')
            DecRad = LSM.getColValues('Dec', units='radian')
            patchCol = getPatchNamesFromMask(mask, RARad, DecRad, root=root, pad_index=pad_index)
            LSM.setColValues('Patch', patchCol, index=2)
    
        else:
            raise ValueError('Grouping alogrithm not understood.')
    
        # Update table grouping and set default patch positions
        LSM._updateGroups()
        history = "algorithm = '{0}'".format(algorithm)
        if algorithm.lower() == 'cluster':
            history += ', numClusters = {0}'.format(numClusters)
        elif algorithm.lower() == 'tessellate' or algorithm.lower() == 'voronoi':
            history += ', targetFlux = {0}'.format(targetFlux)
        LSM._addHistory("GROUP ({0})".format(history))
        if algorithm.lower() != 'every':
            LSM.setPatchPositions(method=method, applyBeam=applyBeam)
        LSM._info()
        return 0
    
    
    def addSingle(LSM, patchName):
        """Add a Patch column with a single patch name"""
        import numpy as np
    
        uniformCol = np.array([patchName]*len(LSM.table))
        LSM.setColValues('Patch', uniformCol, index=2)
    
    
    def addEvery(LSM):
        """Add a Patch column with a different name for each source"""
        from .. import tableio
    
        names = LSM.getColValues('Name').copy()
        for i, name in enumerate(names):
            names[i] = name + '_patch'
        RAs = LSM.getColValues('Ra', units='degree')
        Decs = LSM.getColValues('Dec', units='degree')
        patchDict = {}
        for name, ra, dec in zip(names, RAs, Decs):
            patchDict.update({name: [tableio.RA2Angle(ra)[0], tableio.Dec2Angle(dec)[0]]})
        LSM.table.meta.update(patchDict)
        LSM.setColValues('Patch', names, index=2)
    
    
    def getPatchNamesFromMask(mask, RARad, DecRad, root='mask', pad_index=False):
        """
        Returns an array of patch names for each (RA, Dec) pair in radians
        """
        import pyrap.images as pim
        import scipy.ndimage as nd
        import numpy as np
    
        maskdata = pim.image(mask)
        maskval = maskdata.getdata()[0][0]
    
        act_pixels = maskval
        rank = len(act_pixels.shape)
        connectivity = nd.generate_binary_structure(rank, rank)
        mask_labels, count = nd.label(act_pixels, connectivity)
    
        patchNums = []
        patchNames = []
        for raRad, decRad in zip(RARad, DecRad):
            (a, b, _, _) = maskdata.toworld([0, 0, 0, 0])
            (_, _, pixY, pixX) = maskdata.topixel([a, b, decRad, raRad])
            try:
                # != is a XOR for booleans
                patchNums.append(mask_labels[int(pixY), int(pixX)])
            except:
                patchNums.append(0)
    
        # Check if there is a patch with id = 0. If so, this means there were
        # some Gaussians that fell outside of the regions in the patch
        # mask file.
        n = 0
        for p in patchNums:
            if p != 0:
                if pad_index:
                    patchNames.append('{0}_patch_'.format(root) +
                                      str(p).zfill(int(np.ceil(np.log10(len(set(patchNums))+1)))))
                else:
                    patchNames.append('{0}_patch_'.format(root)+str(p))
            else:
                patchNames.append('patch_'+str(n))
                n += 1
    
        return np.array(patchNames)
    
    
    def get_facet_values(facet, ra, dec, root="facet", default=0, pad_index=False):
        """
        Extract the value from a fits facet file
        """
        import numpy as np
        from astropy.io import fits
        from astropy.wcs import WCS
    
        # TODO: Check astropy version
        # TODO: Check facet is a fits file
    
        with fits.open(facet) as f:
            shape = f[0].data.shape
    
            w = WCS(f[0].header)
            if len(w.wcs.crval) == 4:
                freq = w.wcs.crval[2]
                stokes = w.wcs.crval[3]
                xe, ye, _1, _2 = w.all_world2pix(ra, dec, freq, stokes, 1)
            elif len(w.wcs.crval) == 2:
                xe, ye = w.all_world2pix(ra, dec, 1)
            else:
                raise ValueError('Input mask must have 2 axes (x, y) or 4 axes (x, y, freq, stokes).')
            x, y = np.round(xe).astype(int), np.round(ye).astype(int)
    
            # Dummy value for points out of the fits area
            x[(x < 0) | (x >= shape[-1])] = -1
            y[(y < 0) | (y >= shape[-2])] = -1
    
            if len(w.wcs.crval) == 4:
                data = f[0].data[0, 0, :, :]
            else:
                data = f[0].data[:, :]
    
            values = data[y, x]
    
            # Assign the default value to NaNs and points out of the fits area
            values[(x == -1) | (y == -1)] = default
            values[np.isnan(values)] = default
    
            if pad_index:
                return np.array(["{0}_{1}".format(root, str(int(val)).zfill(int(np.ceil(np.log10(len(set(values))+1))))) for val in values])
            else:
                return np.array(["{0}_{1}".format(root, int(val)) for val in values])