diff --git a/cluster.py b/cluster.py new file mode 100755 index 0000000000000000000000000000000000000000..3a51db60810b548d5fd2c776751b41f5c6537ebe --- /dev/null +++ b/cluster.py @@ -0,0 +1,517 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Clustering based on presence of artifacts around sources + +Created on Tue Dec 10 20:48:31 2019 + +@author: kutkin +""" + +import os +import sys +import astropy.io.fits as fits +from astropy.wcs import WCS +from astropy.coordinates import SkyCoord, angles +from astropy.stats import median_absolute_deviation as mad +import astropy.units as u +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +from matplotlib.patches import Circle, Rectangle, Ellipse + +# import copy + +import logging +logging.basicConfig(level=logging.DEBUG) + +def ra2deg(ra): + s = np.array(ra.split(':'), dtype=float) + if s[0] < 0.0: + sign = -1.0 + else: + sign = 1.0 + return sign*(abs(s[0]) + s[1]/60.0 + s[2]/3600.)*15 + + +def dec2deg(dec): + s = np.array(dec.split('.'), dtype=float) + sign = np.sign(s[0]) + if len(s) == 4: + return sign*(abs(s[0]) + s[1]/60.0 + s[2]/3600. + s[3]*10**(-len(str(s[3])))/3600.) + elif len(s) == 3: + return sign*(abs(s[0]) + s[1]/60.0 + s[2]/3600.) + + +def sep_radec(ra, dec, ra0, dec0): + c0 = SkyCoord(ra0, dec0, unit=u.deg) + c = SkyCoord(ra, dec, unit=u.deg) + return c0.separation(c).arcsec + + +def radec(ra, dec): + """ return SkyCoord object from ra, dec""" + ra = ra2deg(ra) + dec = dec2deg(dec) + # print ra, dec + return SkyCoord(ra, dec, unit=('deg, deg')) + + + +class Cluster(): + """ A cluster object """ + def __init__(self, name, center, radius): + """ + INPUT: + name -- cluster1, ... + center -- SkyCoord object with RA, Dec + radius -- radius (SkyCoord Angle) + """ + self.name, self.center, self.radius = name, center, radius + + # logging.debug(self.radius) + + # def separation(self, other): + # """ + # Separation between Cluster center and ra, dec in arcmin + # """ + # return self.center.separation(other.center) + + + def offset(self, radec): + """ + offset between cluster center and SkyCoord object (Angle) + """ + return self.center.separation(radec) + + + def intersects(self, other): + """ Does it intersect with the other """ + sep = self.center.separation(other.center) + rsum = self.radius + other.radius + # print res, rsum + if sep <= rsum: + return True + else: + return False + + +# TODO merging of the clusters + def merge(self, other, overwrite=True): + """ merge the Cluster with the other one """ + sep = self.center.separation(other.center) + rsum = self.radius + other.radius + if overwrite: + new_name = self.name + else: + new_name = '{}_{}'.format(self.name, other.name) + new_center = SkyCoord((self.center.ra*self.radius/rsum + other.center.ra*other.radius/rsum), + (self.center.dec*self.radius/rsum + other.center.dec*other.radius/rsum)) + new_radius = max((sep, self.radius, other.radius)) + + if sep > rsum: + logging.warning('Merging the non-intersecting clusters') + + return Cluster(new_name, new_center, new_radius) + + + def overplot(self, ax): + c = self.center + txt = self.name.lstrip('cluster') + circle = plt.Circle((c.ra.value, c.dec.value), self.radius.deg, + facecolor='none', edgecolor='r', transform=ax.get_transform('world')) + ax.text(c.ra.value, c.dec.value, '{}'.format(txt), + ha='center', va='center', + fontdict = {'weight':'bold','size': 13}, + transform=ax.get_transform('world')) + ax.add_artist(circle) + + + +def cluster_sources(df, cluster): + radius = cluster.radius + df['ra'] = df.Ra.apply(ra2deg) + df['dec'] = df.Dec.apply(dec2deg) + df['sep'] = sep_radec(df.ra, df.dec, cluster.center.ra, cluster.center.dec)/60.0 + # df['Patch'] + return df.query('sep<@radius') + + +def cluster_snr(df, cluster, wcs, resid_data, pix_arcmin_scale): + """ SNR of the model sources within a cluster """ + radius=cluster.radius.arcmin + a = cluster_sources(df, cluster) + # signal_max = a.I.max() + signal_sum = a.I.sum() + px, py = np.round(wcs.all_world2pix(cluster.center.ra, cluster.center.dec, 0)).astype(int) + # print(px, py) + # sys.exit() + y, x = np.mgrid[0:resid_data.shape[0], 0:resid_data.shape[1]] + radius_pix = radius/pix_arcmin_scale + mask = np.where((y-py)**2+(x-px)**2<=radius_pix**2) + noise = np.std(resid_data[mask]) + return signal_sum, signal_sum/noise + + +def write_df(df, clusters, output=None): + with open(output, 'w') as out: + out.write("Format = Name,Patch,Type,Ra,Dec,I,Q,U,V,SpectralIndex,LogarithmicSI,ReferenceFrequency='1399603271.48438',MajorAxis,MinorAxis,Orientation\n") + if not clusters: + logging.error('No clusters') + return -1 + for cluster in clusters: + df['sep'] = sep_radec(df.ra, df.dec, cluster.center.ra, cluster.center.dec) + clust = df.query('sep <= @cluster.radius.arcsec') + clust.loc[clust.index,'Patch'] = cluster.name + df.loc[clust.index, 'Patch'] = cluster.name + clusternum = cluster.name.lstrip('cluster') + with open(output, 'a') as out: + out.write(', {}, POINT, , , , , , , , , , , ,\n'.format(cluster.name)) + clust.to_csv(out, index=False, header=False, columns=df.columns[:-3]) + + clust = df.query('Patch == "cluster0"') + clusternum = int(clusternum) + 1 + restname = 'cluster' + str(clusternum) + clust.loc[clust.index,'Patch'] = restname + df.loc[clust.index, 'Patch'] = restname + with open(output, 'a') as out: + out.write(', {}, POINT, , , , , , , , , , , ,\n'.format(restname)) + clust.to_csv(out, index=False, header=False, columns=df.columns[:-3]) + + return 0 + + +def radial_profile(ra, dec, resid_img): + window = 180 + step = 10 + # sampling=200 + initial_radius = 15 + final_radius = window + rads = np.arange(initial_radius, final_radius, step) + with fits.open(resid_img) as f: + wcs = WCS(f[0].header).celestial + pix_arcmin_scale = f[0].header['CDELT2']*60 + resid_data = f[0].data[0,0,...] + c = radec(ra, dec) + px, py = wcs.all_world2pix(c.ra, c.dec, 0) + px, py = int(round(py)), int(round(px)) + res = np.zeros_like(rads, dtype=float) + for ind, rad in enumerate(rads): + sampling = int(1000 * rad / final_radius) + for angle in np.linspace(0, 2*np.pi, sampling): + x = int(rad * np.cos(angle)) + y = int(rad * np.sin(angle)) + d = resid_data[y+py:y+py+step, x+px:x+px+step] + res[ind] += np.nanmean(abs(d)) / sampling + return rads, res + + +def sector_max(ra, dec, resid_img, ax, nsectors=6): + r0 = 10 + r1 = 200 # pixels + with fits.open(resid_img) as f: + wcs = WCS(f[0].header).celestial + img_size = f[0].header['NAXIS1'] + pix_arcmin_scale = f[0].header['CDELT2']*60 + resid_data = f[0].data[0,0,...] + + mad_std = mad(resid_data) + + c = radec(ra, dec) + px, py = wcs.all_world2pix(c.ra, c.dec, 0) + px, py = int(round(px)), int(round(py)) + + y, x = np.mgrid[0:img_size,0:img_size] + x = x-px + y = y-py + radcond = np.logical_and(np.hypot(x,y)>r0, np.hypot(x,y)<r1) + sectors = np.linspace(-np.pi, np.pi, nsectors) + result = [] + + + # fig = plt.figure(figsize=[10,10]) + # ax = fig.add_subplot(1,1,1, projection=wcs.celestial) + # vmin, vmax = np.percentile(resid_data, 5), np.percentile(resid_data, 95) + # ax.imshow(resid_data, vmin=vmin, vmax=vmax, origin='lower')#cmap='gray', vmin=2e-5, vmax=0.1)#, norm=LogNorm()) + + for i in range(nsectors-1): + ang1, ang2 = sectors[i], sectors[i+1] + # angcond = np.logical_and(x*np.sin(ang1)/np.cos(ang1) < y, y <= x*np.sin(ang2)/np.cos(ang2)) + coef = np.arctan2(y,x) + angcond = np.logical_and(ang1 < coef, coef < ang2) + cond = np.logical_and(radcond, angcond) + res = np.nanmax(resid_data[cond]) / mad_std + result.append(res) + + cond2 = np.logical_and(cond, resid_data==max(resid_data[cond])) + yy, xx = np.argwhere(cond2)[0] + ra, dec = wcs.all_pix2world(xx, yy, 0) + # print yy, xx + if res > 10: + ax.plot(ra, dec, '.r', transform=ax.get_transform('world')) + + # tmp = np.argwhere(cond) + # for p in tmp: + # ax.plot(p[1], p[0], '.k') + # print ang1, ang2, len(cond[cond]) + + return np.array(result) + + + +def ellipses_coh(img, x0=None, y0=None, dr=None, amin=20, amax=100): + """ + Find an ellipse ring with the highest absolute mean pixels value. + Return: max of abs of mean of the pixels within various ellipses, + minor_axis/major_axis, major_axis, number of pixels within the ellipse + """ + + y, x = np.mgrid[0:img.shape[0], 0:img.shape[1]] + x0 = x0 or int(img.shape[1]/2) + y0 = y0 or int(img.shape[0]/2) + y, x = y-y0, x-x0 + + eccs = np.linspace(0.6, 1.0, 10) + arange = range(amin, amax) + + # drmin = 1.0 + # drmax = 40.0 + # OldRange = float(amax - amin) + # NewRange = float(drmax - drmin) + + + res = np.zeros((len(eccs), len(arange))) + for i, ecc in enumerate(eccs): + for j, a in enumerate(arange): + # dr = (((a - amin) * NewRange) / OldRange) + drmin + b = a*ecc + cond = np.logical_and(x**2*(a+dr)**2 + y**2*(b+dr)**2 < (a+dr)**2*(b+dr)**2, x**2*a**2 + y**2*b**2 >= a**2*b**2) + if len(cond[cond])>=50: + # res[i, j] = abs(sum(img[cond]))/len(img[cond]) + res[i, j] = abs(np.nanmean(img[cond])) + else: + logging.warning('{:d} pixels for dr={:.1f} a={:0.1f}, e={:.1f}'.format(len(cond[cond]), dr, a, ecc)) + res[i, j] = 0.0 + + + imax, jmax = np.argwhere(res==res.max())[0] + eccmax = eccs[imax] + amax = arange[jmax] + bmax = amax*eccmax + cond = np.logical_and(x**2*(amax+dr)**2 + y**2*(bmax+dr)**2 < (amax+dr)**2*(bmax+dr)**2, + x**2*amax**2 + y**2*bmax**2 >= amax**2*bmax**2) + # logging.debug('Ellipse size: {:d} pixels'.format(len(img[cond]))) + return res.max(), eccmax, amax, len(img[cond]) + + +def manual_clustering(fig, ax, wcs, pix_arcmin_scale, startnum=1): + + def get_cluster(cen, rad, name): + xc, yc = cen + x, y = rad + radius_pix = np.hypot(x-xc, y-yc) + radius = angles.Angle(radius_pix * pix_arcmin_scale, unit='arcmin') + ra, dec = wcs.all_pix2world(xc, yc, 0) + center = SkyCoord(ra, dec, unit='deg') + logging.info("Cluster {} at {} of {} radius".format(name, center, radius)) + return Cluster(name, center, radius) + + do = True + i = startnum + clusters = [] + while do: + logging.info('Select center and radius for the cluster. \ + Then press left button to continue or middle to skip. \ + Right button -- to cancel the last selection.') + inp = fig.ginput(3, timeout=-1) + cen, rad = inp[:2] + if len(inp) == 2: + do = False + cluster = get_cluster(cen, rad, 'cluster{}'.format(i)) + cluster.overplot(ax) + clusters.append(cluster) + i += 1 + + return clusters + + +def auto_clustering(fig, ax, df, wcs, resid_data, pix_arcmin_scale, nbright, + cluster_radius, cluster_overlap, boxsize=250, nclusters=5): + + a = df.sort_values('I')[::-1][:nbright][['Ra', 'Dec', 'I']] + clusters = [] # + csnrs = [] + cfluxes = [] + cmeasures = [] # the main value to clusterize by + cellipse_params = [] + + fmin, fmax = min(a.I), max(a.I) + + rms = mad(resid_data) + resid_mean = np.mean(resid_data) + + if nclusters == 'auto': + logging.info('Number of clusters will be determined automatically') + else: + logging.info('Maximum number of clusters is {}'.format(nclusters)) + + logging.info('Getting measures for the potential clusters...') + src_index = 1 + cluster_index = 1 + + for ra, dec, flux in a.values: + c = radec(ra, dec) + px, py = np.round(wcs.all_world2pix(c.ra, c.dec, 0)).astype(int) + # print(px, py) + # print(src_index, ra, dec, px, py, flux) + src_index += 1 + +# skip the edge sources + if (abs(px-resid_data.shape[1]) < boxsize)or (abs(py-resid_data.shape[0]) < boxsize): + logging.debug('Skipping the edge source') + continue + +# Check if the component already in a cluster + if clusters and any([c0.offset(c).arcmin<=cluster_radius.value*cluster_overlap for c0 in clusters]): + # logging.debug('Already in a cluster') + continue + + small_resid = resid_data[py-boxsize:py+boxsize, px-boxsize:px+boxsize] + ellipse_mean, ecc, amaj, numpix = ellipses_coh(small_resid, amin=20, amax=boxsize-1, dr=1.0) + # print ellipse_mean, ecc, amaj, numpix + + + + # if abs(ring_mean/resid_mean) > 1.7e5: + # if abs(ring_mean/small_resid_mean) > 1e4: + + if nclusters=='auto': + if abs(ellipse_mean/rms) > 1.4: + rect = plt.Rectangle((px-boxsize, py-boxsize), 2*boxsize, 2*boxsize, + lw=2, color='k', fc='none') + ellipse = Ellipse(xy=(px,py), width=2*amaj*ecc, height=2*amaj, + angle=0, lw=3, color='gray', fc='none', alpha=0.5) + ax.add_artist(rect) + ax.add_artist(ellipse) + cluster_name = 'cluster{}'.format(cluster_index) + cluster = Cluster(cluster_name, c, cluster_radius) + csnr = cluster_snr(df, cluster, wcs, resid_data, pix_arcmin_scale)[1] + if csnr < 100: # skip clusters with low SNR + logging.debug('Skipping low SNR cluster at {}'.format(cluster.center)) + continue + clusters.append(cluster) + cluster.overplot(ax) + print(cluster_name, ra, dec, csnr, boxsize) + cluster_index += 1 + else: + cluster_name = 'cluster{}'.format(src_index) + cluster = Cluster(cluster_name, c, cluster_radius) + cflux, csnr = cluster_snr(df, cluster, wcs, resid_data, pix_arcmin_scale) + clusters.append(cluster) + cfluxes.append(cflux) + csnrs.append(csnr) + cmeasures.append(abs(ellipse_mean/resid_mean)) + cellipse_params.append([amaj, ecc, numpix]) + + if nclusters == 'auto': + return clusters + else: + indexes = np.argsort(cmeasures)[::-1][:nclusters] + + final_clusters = [] + logging.info('Picking {} clusters'.format(nclusters)) + + for i in indexes: + cmeasure = cmeasures[i] + cluster = clusters[i] + amaj, ecc, npix = cellipse_params[i] + csnr = csnrs[i] + cflux = cfluxes[i] + + if csnr < 100: # skip clusters with low SNR + logging.debug('Skipping low SNR cluster at {}'.format(cluster.center)) + continue + + cluster.name = 'cluster{}'.format(cluster_index) + print(cluster.name, ra, dec, csnr, cmeasure) + + px, py = wcs.all_world2pix(cluster.center.ra, cluster.center.dec, 0) + px, py = int(px), int(py) + + rect = plt.Rectangle((px-boxsize, py-boxsize), 2*boxsize, 2*boxsize, + lw=2, color='k', fc='none') + ellipse = Ellipse(xy=(px,py), width=2*amaj*ecc, height=2*amaj, + angle=0, lw=3, color='gray', fc='none', alpha=0.5) + ax.add_artist(rect) + ax.add_artist(ellipse) + + final_clusters.append(cluster) + cluster.overplot(ax) + cluster_index += 1 + + return final_clusters + + +def main(img, resid, model, auto=True, add_manual=False, nclusters=5, boxsize=250, + nbright=80, cluster_radius=5, cluster_overlap=1.6): + + path = os.path.split(os.path.abspath(img))[0] + output = os.path.join(path, 'clustered.txt') + + df = pd.read_csv(model, skipinitialspace=True) + df['ra'] = df.Ra.apply(ra2deg) + df['dec'] = df.Dec.apply(dec2deg) + + df.insert(1, 'Patch', 'cluster0') + df.insert(6, 'Q', 0) + df.insert(7, 'U', 0) + df.insert(8, 'V', 0) + + image_data = fits.getdata(img)[0,0,...] + resid_data = fits.getdata(resid)[0,0,...] + with fits.open(img) as f: + wcs = WCS(f[0].header).celestial + pix_arcmin_scale = f[0].header['CDELT2']*60 + # racen = f[0].header['CRVAL1'] + # deccen = f[0].header['CRVAL2'] + cluster_radius = angles.Angle(cluster_radius, unit='arcmin') + fig = plt.figure(figsize=[12,12]) + ax = fig.add_subplot(1,1,1, projection=wcs.celestial) + vmin, vmax = np.percentile(image_data, 5), np.percentile(image_data, 95) + ax.imshow(resid_data, vmin=vmin, vmax=vmax, origin='lower')#cmap='gray', vmin=2e-5, vmax=0.1)#, norm=LogNorm()) + if auto: + clusters = auto_clustering(fig, ax, df, wcs, resid_data, pix_arcmin_scale, nbright, cluster_radius, + cluster_overlap, boxsize=boxsize, nclusters=nclusters) + if add_manual: + clusters_man = manual_clustering(fig, ax, wcs, pix_arcmin_scale, startnum=len(clusters)+1) + clusters = clusters + clusters_man + else: + clusters = manual_clustering(fig, ax, wcs, pix_arcmin_scale) + + if clusters: + write_df(df, clusters, output=output) + fig.tight_layout() + fig.savefig(path+'/clustering.png') + + +### if __name__ == "__main__": +if __name__ == "__main__": + + # base = '/home/kutkin/apertif/clustering/191209026/01/dical2-' + # base = '/home/kutkin/apertif/clustering/191209026/15/dical2-' + # base = '/home/kutkin/apertif/clustering/191010042/00/dical2-' + # base = '/home/kutkin/apertif/clustering/191006041_21/dical2-' + # base = '/home/kutkin/apertif/clustering/191010042_25/dical-' + # base = '/home/kutkin/apertif/clustering/191209026/10/dical2-' + base = '/home/kutkin/apertif/clustering/191010041/23/dical2-' + # base = '/home/kutkin/apertif/clustering/190915041/25/dical2-' + + img = base + 'image.fits' + resid = base + 'residual.fits' + model = base + 'sources.txt' + # img = sys.argv[1] + # resid = sys.argv[2] + # model = sys.argv[3] + + main(img, resid, model, auto=True, nclusters='auto') +