Skip to content
Snippets Groups Projects
Commit f0757eb0 authored by Felix  Semler's avatar Felix Semler
Browse files

clean up

parent 50edf5a4
No related branches found
No related tags found
1 merge request!2Merge Master
#!/usr/bin/env python
#
# $Id: MSFieldCompress.py,v 1.3 2008/04/21 21:09:41 schoenma Exp $
#
# Script: MSFieldCompress.py
# Author: A.P. Schoenmakers (ASTRON)
#
# This script compresses the FIELD and SOURCE table of a Measurement Set.
# It leaves only the unique entries in these tables and adapts the FIELD_ID
# references in the MAIN table.
# This is convenient if the observation was done using a mosaic
# pattern such as A -> B -> C -> A -> B -> C etc. All these pointings
# end up individually in the FIELD and SOURCE table. When converting the
# MS to UVFits, this would lead to a large SU table in the UVFits
# file. By compressing the list in the FIELD and SOURCE table beforehand, the
# created SU table is much shorter. This makes it much easier to
# collect all data related to a single pointing.
#
# Usage: MSFieldCompress.py <MSName>
#
# IMPORTANT: The input MS will be adapted; reversing the process is not
# possible!
#
# This we need for commandline handling and file checking
import os,sys
# This we need for MS interfacing
from pyrap.tables import *
# Numpy has several methods for array comparison and index finding
from numpy import *
def MSFieldCompress(ms):
# Open the FIELD table.
field = table(ms.getkeyword('FIELD'),readonly=False,ack=False)
fieldname = field.name()
fieldrows = field.nrows()
if (fieldrows == 1):
print "Field table only has one row; nothing to do!"
sys.exit(0)
# The NAME column holds the names of the mosaicing position. Multiple
# entries can have the same NAME.
fieldnames = array(field.getcol("NAME"))
uniq_fieldname = list(set(fieldnames));
new_fieldrows = len(uniq_fieldname)
if (new_fieldrows == fieldrows):
print "FIELD table already contains unique entries only; nothing to do!"
sys.exit(0)
print "Compressing FIELD subtable from "+str(fieldrows)+" to "+str(new_fieldrows)+" entries"
# old_index holds the current entries in the FIELD table, new_index will
# contain the indices of the remaining (unique) entries.
old_index = arange(len(fieldnames))
new_index = copy(old_index)
# Create a temporary compressed FIELD table, this will be copied into the
# input MS later.
field.copy("TMP_FIELD",copynorows=True)
tmp_field = table("TMP_FIELD",readonly=False,ack=False)
# Check the SOURCE table.
source = table(ms.getkeyword('SOURCE'),readonly=False,ack=False)
sourcename = source.name()
sourcerows = source.nrows()
if (sourcerows != fieldrows and sourcerows > 0):
print "Number of entries in SOURCE table does not match that in FIELD table;"
print "Cannot proceed"
exit(1)
# Create a temporary source table
source.copy("TMP_SOURCE",copynorows=True)
tmp_source = table("TMP_SOURCE",readonly=False,ack=False)
# Create the array new_index that holds the new index for each entry
# in the FIELD table.
# Also, copy each unique name entry in the FIELD table to the temporary
# table. Also for SOURCE entries if there is a valid SOURCE table.
for index,uf in enumerate(uniq_fieldname):
match = where(fieldnames == uf)
new_index[match] = index
old_row = match[0][0] # First match
field.copyrows(tmp_field,startrowin=old_row,nrow=1)
if (sourcerows > 0):
tmp_field.putcell("SOURCE_ID",index,index)
source.copyrows(tmp_source,startrowin=old_row,nrow=1)
tmp_source.putcell("SOURCE_ID",index,index)
field.close()
del field
source.close()
del source
# Copy the compressed FIELD table to the input MS
tmp_field.flush()
tmp_field.copy(fieldname,deep=True)
tmp_field.close()
tabledelete("TMP_FIELD",ack=False)
# Copy the compressed SOURCE table to the input MS
tmp_source.flush()
tmp_source.copy(sourcename,deep=True)
tmp_source.close()
tabledelete("TMP_SOURCE",ack=False)
# Now adapt the FIELD_ID pointers in the MAIN table
field_id = ms.getcol("FIELD_ID")
tmp = copy(field_id)
for i in old_index:
tmp[field_id == i] = new_index[i]
ms.putcol("FIELD_ID",tmp)
ms.flush()
#
# MAIN program starts here
#
if (len(sys.argv) == 2):
msname = sys.argv[1]
else:
print "Usage:"
print "\t"+sys.argv[0]+" <MS>"
print "\t <MS> is required"
sys.exit(1)
if (os.path.isdir(msname) == 0):
print "Could not find MS: "+msname
sys.exit(1)
ms = table(msname,readonly=False,ack=False)
MSFieldCompress(ms)
ms.close()
print "Done"
sys.exit(0)
#!/usr/bin/env python
#
# $Id: MSsplit.py,v 1.7 2008/08/19 08:59:21 schoenma Exp $
#
# Script to Split a multifreq/posmos/freqmos MS.
#
# Useage: MSsplit.py <MSname>
#
# If the MS is a frequency mosaic, it will be split in separate frequency
# mosaic points. The new MSes will be called <rootMSname>_FM<x>.MS
# where <x> the number of the frequency mosaic point. Only if there is
# valid data for this point in the original MS, a new MS will be created.
#
# For position mosaics, the new files will be named like
# <rootMSname>_PM<x>.MS. Again, only if there is valid data in the original
# MS a new MS will be created.
#
# For frequency-position mosaics, only the frequency groups are splitted out.
# To also split out the position points, each frequency-group MS has to be
# fed to the MSsplit.py program
#
# Important:
# Since we use the MSinfo tool that uses the NFRA_TMS_PARAMETERS table,
# an already splitted MS will be tried to be split again (creating one MS
# with a new name). This is because the NFRA... table is not changed while
# splitting. The DATA_DESCRIPTION, SPECTRAL_WINDOW, FEED and MAIN table are
# changed if necessary in the created MS.
#
# This script uses the pyrap_tables Python layer to the casacore package.
#
# Get required modules
import os,sys
from optparse import OptionParser
from pyrap.tables import *
# Function to return the indices where the items in a list match a value
#
# Example:
# a = [1,2,3,4,4,6,7,8]
# findall(a,4) returns [3,4]
def findall(array_in, value):
list_out = []
list_in = list(array_in)
i = -1
try:
while 1:
i = list_in.index(value, i+1)
list_out.append(i)
except ValueError:
pass
return list_out
# Function that returns a new list with the values of the elements with
# an index in a given indexlist of a given list
#
# Example:
# a = [1,2,3,4,5,6,7,8]
# indx = [0,2,4]
# makelist(a,indx) returns [1,3,5]
def makelist(list_in,indexlist):
list_out = []
for index in indexlist:
list_out.append(list_in[index])
return list_out
# Function that sets new values at given indices in a list
# list_in is the list in which to replace the values
#
# Example:
# a = [1,2,3,4,5,6,7,8]
# indx = [0,2,4]
# setvalue(a,0,indx) changes a into [0,2,0,4,0,6,7,8]
def setvalue(list_in,newvalue,indexlist):
for index in indexlist:
list_in[index] = newvalue
# Function to get the number of position mosaic points and the number
# of frequency mosaic points of an observation. This uses the output
# of the 'MSinfo' program, which must therefore be available (either from
# an active AIPS++ version, or from another separate build).
# Returns a dictionary with keys 'nposmos' and 'nfreqmos'
def get_msinfo(msname):
cmd = "MSinfo -in "+msname+" mode=raw silent=1";
put,get,err = os.popen3(cmd);
msinfo = get.readlines()
if (len(err.readlines()) != 0 or len(msinfo) == 0):
print "Could not run command: "+cmd
sys.exit(1)
nfreqmos = 0
nposmos = 0
for indx in range(len(msinfo)):
val = msinfo[indx].strip()
vals = val.split('=');
if (vals[0] == "NFreqMos"):
nfreqmos = int(vals[1])
elif (vals[0] == "NPositions"):
nposmos = int(vals[1])
# print "FM: "+str(nfreqmos)+" PM: "+str(nposmos)
if (nfreqmos == 0 or nposmos == 0):
print "Could not find mosaicing information from MSinfo"
sys.exit(1)
msinfo = {}
msinfo['nposmos'] = nposmos
msinfo['nfreqmos'] = nfreqmos
return msinfo
# Function to correct the Feed table. Arguments are the name of the MS, and
# the table object of the MS (writeable!).
def MSCorrectFeed(ms):
# In case of a combination of X/Y and L/R frontends (e.g, 6 and 13 cm) in a
# single observation, the FEED table contains n*nANT rows with n > 1. But we
# need to create a FEED table with only nAnt entries in the splitted MSes.
# Because the FEEDn columns in the main table refer to a separate column
# FIEED_ID in the FEED table (and not a rownumber!), the FEED table
# rows are not adapted, nor are the FEEDn values in the MAIN table.
# Find the unique values in the FEED1 column in the MAIN table. The input
# is a splitted MS, remember!
feedcolvalues = ms.getcol("FEED1")
uniq_feedcolvalues = list(set(feedcolvalues))
# Get the values from the FEED table column FEED_ID
tmpname = ms.name()+"/FEED"
feedtable = table(tmpname,ack=False)
feed_id = feedtable.getcol("FEED_ID")
# 'match' holds all indexes of the FEED_ID column in the FEED table whose
# values are equal to one of the unique values of FEED1 in MAIN.
# Only those rows with index in 'match' will be kept.
keep_rows = []
for i in range(len(uniq_feedcolvalues)) :
match = findall(feed_id,uniq_feedcolvalues[i])
keep_rows += match
keep_rows = set(keep_rows)
all_rows = set(range(feedtable.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
feedtable.removerows(del_rows)
feedtable.close()
# Delete the object to release all filelocks etc. If you do not do this
# copying to this table will result in unpredictable table entries...!
del feedtable
def MSCorrectSpW(ms):
# This function is needed so that the Spectral Window table matches the
# values as present in the DATA_DESCRIPTOR table.
# If there are more lines in the SW-table, than that there are referred to
# in the DATA_DESCRIPTOR table, the ms2uvfits
# converter believes the MS is a multi-IF dataset and writes the
# wrong frequency in the FQ table.
# The function creates a new shorter Spectral_window table and adapts the
# pointers to the entries in the SW-table in the DATA_DESCRIPTOR table.
#
# It must be run after the data descriptor table has been minimized with
# function MSCorrectDataDesc.
# First find the unique values in the SPW_ID column in the DataDesc table.
dd = table(ms.getkeyword('DATA_DESCRIPTION'),readonly=False,ack=False);
dd_spwid = dd.getcol("SPECTRAL_WINDOW_ID");
uniq_ddspwid = list(set(dd_spwid))
# Create a temporary (empty table) copy of the spw table in file tmpspw
spwtable = table(ms.getkeyword('SPECTRAL_WINDOW'),readonly=False,ack=False);
# For each unique SPW_ID value in the DD-table, the associated row
# in the SPECTRAL_WINDOW table is kept. We also adapt the
# values of the SPW_ID in the DD-table to match it with the new rownumber
# (which starts at 0, of course)
keep_rows = []
for i in range(len(uniq_ddspwid)):
match = findall(dd_spwid,uniq_ddspwid[i])
for j in range(len(match)):
dd.putcell("SPECTRAL_WINDOW_ID",match[j],i)
keep_rows += [uniq_ddspwid[i]]
keep_rows = set(keep_rows)
all_rows = set(range(spwtable.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
spwtable.removerows(del_rows)
spwtable.close()
# Delete the object to release all filelocks etc. If you do not do this
# copying to this table will result in unpredictable table entries...!
del spwtable
# Clean up
dd.flush()
dd.close()
# Function to reduce the Data_Descriptor table. Arguments are the name of the
# MS, and the table object of the MS (writeable!)
def MSCorrectDataDesc(ms):
# This method is needed to change the DataDescriptor table so it
# contains only the rows referred to from the MS MAIN
# table. The entries DATA_DESC_ID in the MAIN table are adapted
# to point to the new entries (rownrs) in the new data descriptor table.
# This method changes the number of entries in the DD table, but not the
# values in the table (such as SPECTRAL_WINDOW_ID); that will be done later
# (see method MSCorrectSpW).
# Find the unique values in the DATA_DESC_ID column in
# the MAIN table.
ddid = ms.getcol("DATA_DESC_ID")
uniq_ddid = list(set(ddid))
# Open the DATA_DESCRIPTION table of the splitted MS
dd_table = table(ms.getkeyword('DATA_DESCRIPTION'),readonly=False,ack=False);
# Keep the relevant rows, only, from the original table.
# Also adapt the DD_ID column in the MAIN table so the values point to the
# new rownumbers (which start at 0).
new_ddid = ms.getcol("DATA_DESC_ID");
new_row = 0
keep_rows = []
for i in range(len(uniq_ddid)):
match = findall(ddid,uniq_ddid[i])
setvalue(new_ddid,i,match)
keep_rows += [uniq_ddid[i]]
keep_rows = set(keep_rows)
all_rows = set(range(dd_table.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
dd_table.removerows(del_rows)
dd_table.close()
# Delete the object to release all filelocks etc. If you do not do this
# copying to this table will result in unpredictable table entries...!
del dd_table
# Write adapted data_descriptor_id values in the DATA_DESC_ID column in the
# MAIN table; these reflect the new rownrs in the adapted DATA_DESCRIPTION
# table.
ms.putcol("DATA_DESC_ID",new_ddid)
# Only flush() the MAIN table for now, but do not close() it; we need it
# later!
ms.flush()
# Function to reduce the SYSCAL table. Arguments are the name of the
# MS, and the table object of the MS (writeable!)
def MSCorrectSyscal(ms):
# The syscal table holds for each antenna, band and integration time the
# system temperatures and such. Therefore it has a columns SPECTRAL_WINDOW_ID,
# to refer in each row to the proper Band. As the SPECTRAL_WINDOW table will
# change, we also must adapt these values in the SYSCAl table. We also remove
# all entries which are not relevant to this MS (i.e., of bands whose data
# is not present in the MAIN table anymore.
# The method must be run after the data descriptor table has been minimized
# with function MSCorrectDataDesc.
# First find the unique values in the SPW_ID column in the DataDesc table.
dd = table(ms.getkeyword('DATA_DESCRIPTION'),readonly=True,ack=False);
dd_spwid = dd.getcol("SPECTRAL_WINDOW_ID");
uniq_ddspwid = list(set(dd_spwid))
# Find out which rows to delete from the syscal table
syscal=table(ms.getkeyword('SYSCAL'),readonly=False,ack=False);
syscal_spwid = syscal.getcol("SPECTRAL_WINDOW_ID");
uniq_syscalspwid = list(set(syscal_spwid))
keep_rows = [] # list that contains rows to keep in syscal
for i in range(len(uniq_ddspwid)):
match = findall(syscal_spwid,uniq_ddspwid[i])
keep_rows += match
keep_rows = set(keep_rows)
all_rows = set(range(syscal.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
syscal.removerows(del_rows)
syscal.flush()
# Now re-index the Spectral_window_id column value in the remaining rows.
# These must start from 0 and run upto the remaining number of
# SPECTRAL_WINDOW entries in the MS. As the remaining entries in the SYSCAL
# should be consecutive, we only need to lower these values so they start
# from 0.
syscal_spwid = syscal.getcol("SPECTRAL_WINDOW_ID");
uniq_syscalspwid = list(set(syscal_spwid))
new_spwid = range(len(uniq_syscalspwid))
for i in new_spwid:
match = findall(syscal_spwid,uniq_syscalspwid[i])
setvalue(syscal_spwid,i,match)
syscal.putcol("SPECTRAL_WINDOW_ID",syscal_spwid)
syscal.close()
def MSCorrectSubtables(msname):
ms = table(msname,readonly=False,ack=False)
MSCorrectDataDesc(ms)
MSCorrectSyscal(ms)
MSCorrectFeed(ms)
MSCorrectSpW(ms)
ms.close()
del ms
def MSSplitFreq(msname):
ms = table(msname,readonly=True,ack=False);
ms_spw = table(ms.getkeyword('SPECTRAL_WINDOW'),ack=False);
ddid_col = ms.getcol('DATA_DESC_ID');
uniq_ddid = list(set(ddid_col));
# Find the names of the Spectral_window entries; use these to find
# out the number of Spectral_window 'groups' (a number of rows)
spw_names = ms_spw.getcol('NAME');
uniq_spwnames = list(set(spw_names));
n_uniq_spw = len(uniq_spwnames);
n_spwgroups = len(spw_names)/n_uniq_spw;
rootmsname = msname.split(".")[0]
newNames = []
print 'OK - Try to split',msname,'in Frequency groups';
for i in range(n_spwgroups):
x0 = i*n_uniq_spw;
x1 = (i+1)*n_uniq_spw;
querycmd = 'DATA_DESC_ID in ['+str(x0)+':'+str(x1)+']'
sub_ms = ms.query(querycmd)
# if there is valid data for this frequency mosaic point, create a new
# MS
newName = rootmsname+'_FM'+str(i)+'.MS';
if (sub_ms.nrows() > 0):
print 'Create: '+newName;
sub_ms.copy(newName, deep=True)
# Now open the new MS as writeable, and correct the administration
MSCorrectSubtables(newName)
newNames.append(newName)
else:
print 'Not creating '+newName+' as there are no datapoints for this MS'
ms_spw.close()
ms.close()
return newNames;
def MSSplitPos(msname):
ms = table(msname,ack=False);
# Open the FIELD subtable
ms_field = table(ms.getkeyword('FIELD'),ack=False);
nfields = ms_field.nrows();
rootmsname = msname.split(".")[0]
newNames = []
print 'OK - try to split',msname,'in Positions';
for i in range(nfields):
newName = rootmsname+'_PM'+str(i)+'.MS'
querycmd = "FIELD_ID == " + str(i)
sub_ms = ms.query(querycmd);
if (sub_ms.nrows() > 0):
print 'Create: '+newName;
sub_ms.copy(newName, deep=True);
newNames.append(newName);
ms_field.close()
ms.close()
return newNames
def MSSplitTimeranges(msname,NrofTimeranges):
ms = table(msname,ack=False);
scans = ms.getcol("SCAN_NUMBER")
rowsperscan = ms.nrows()/(scans[ms.nrows()-1] - scans[0])
scansperMS = int(ms.nrows()/(NrofTimeranges*rowsperscan)) + 1
rootmsname = msname.split(".")[0]
newNames = []
print 'OK - Try to split',msname,'in',NrofTimeranges,'parts';
for i in range(NrofTimeranges):
startscan = scans[0] + i*scansperMS # Starts at 1 in the MS
endscan = scans[0] - 1 + (i+1)*scansperMS
if (endscan > scans[ms.nrows()-1]): endscan = scans[ms.nrows()-1]
if (endscan >= startscan):
querycmd = 'SCAN_NUMBER in ['+str(startscan)+':'+str(endscan)+']'
sub_ms = ms.query(querycmd)
# if there is valid data create a new MS
newName = rootmsname+'_T'+str(i)+'.MS';
if (sub_ms.nrows() > 0):
print 'Create: '+newName+' having '+str(endscan-startscan+1)+' samples';
sub_ms.copy(newName, deep=True)
newNames.append(newName)
else:
print 'Not creating '+newName+' as there are no datapoints for this MS'
ms.close()
return newNames;
def cmdparse():
usage = "%prog <MS> [<MS> <MS> <MS> etc.] [n]\n\t- <MS>: (one or more) Measurement Set names is required.\n\t- [n] : (optional) will split the MS(es) in n timeranges."
parser = OptionParser(usage)
(options, args) = parser.parse_args()
return args
# Here starts the main program
if __name__ == "__main__":
args = cmdparse()
if (len(args) == 0):
print " MSsplit.py -h gives help info"
sys.exit(0)
try:
numberMS = int(args[len(args)-1])
except (ValueError, IndexError):
numberMS = None
if (len(args) > 1 and numberMS >= 0):
if (numberMS > 1):
for arg in range(len(args)-1):
msname = args[arg];
# Check presence of MS
if (os.path.isdir(msname) == 0):
print "Could not find MS: "+msname
else:
newFiles = MSSplitTimeranges(msname,numberMS)
else:
print "Not splitting the file into less than 2 timeranges"
else:
for arg in range(len(args)):
msname = args[arg];
# Check presence of MS
if (os.path.isdir(msname) == 0):
print "Could not find MS: "+msname
else:
# Get the info on MS using MSinfo
msinfo = get_msinfo(msname)
# Frequency splitting must be done before position splitting, as the
# ms2uvfits converter cannot handle freq.mos data, but can handle posmos
if (msinfo['nfreqmos'] > 1):
newFiles = MSSplitFreq(msname)
elif(msinfo['nposmos'] > 1):
newFiles = MSSplitPos(msname)
if (len(newFiles) > 0):
print "Done. Created "+str(len(newFiles))+ " MSes."
else:
print "No new files created."
#!/usr/bin/env python
#
# $Id: MSsplit.py,v 1.7 2008/08/19 08:59:21 schoenma Exp $
#
# Script to Split a multifreq/posmos/freqmos MS.
#
# Useage: MSsplit.py <MSname>
#
# If the MS is a frequency mosaic, it will be split in separate frequency
# mosaic points. The new MSes will be called <rootMSname>_FM<x>.MS
# where <x> the number of the frequency mosaic point. Only if there is
# valid data for this point in the original MS, a new MS will be created.
#
# For position mosaics, the new files will be named like
# <rootMSname>_PM<x>.MS. Again, only if there is valid data in the original
# MS a new MS will be created.
#
# For frequency-position mosaics, only the frequency groups are splitted out.
# To also split out the position points, each frequency-group MS has to be
# fed to the MSsplit.py program
#
# Important:
# Since we use the MSinfo tool that uses the NFRA_TMS_PARAMETERS table,
# an already splitted MS will be tried to be split again (creating one MS
# with a new name). This is because the NFRA... table is not changed while
# splitting. The DATA_DESCRIPTION, SPECTRAL_WINDOW, FEED and MAIN table are
# changed if necessary in the created MS.
#
# This script uses the pyrap_tables Python layer to the casacore package.
#
# Get required modules
import os,sys
from optparse import OptionParser
from pyrap.tables import *
# Function to return the indices where the items in a list match a value
#
# Example:
# a = [1,2,3,4,4,6,7,8]
# findall(a,4) returns [3,4]
def findall(array_in, value):
list_out = []
list_in = list(array_in)
i = -1
try:
while 1:
i = list_in.index(value, i+1)
list_out.append(i)
except ValueError:
pass
return list_out
# Function that returns a new list with the values of the elements with
# an index in a given indexlist of a given list
#
# Example:
# a = [1,2,3,4,5,6,7,8]
# indx = [0,2,4]
# makelist(a,indx) returns [1,3,5]
def makelist(list_in,indexlist):
list_out = []
for index in indexlist:
list_out.append(list_in[index])
return list_out
# Function that sets new values at given indices in a list
# list_in is the list in which to replace the values
#
# Example:
# a = [1,2,3,4,5,6,7,8]
# indx = [0,2,4]
# setvalue(a,0,indx) changes a into [0,2,0,4,0,6,7,8]
def setvalue(list_in,newvalue,indexlist):
for index in indexlist:
list_in[index] = newvalue
# Function to get the number of position mosaic points and the number
# of frequency mosaic points of an observation. This uses the output
# of the 'MSinfo' program, which must therefore be available (either from
# an active AIPS++ version, or from another separate build).
# Returns a dictionary with keys 'nposmos' and 'nfreqmos'
def get_msinfo(msname):
cmd = "MSinfo -in "+msname+" mode=raw silent=1";
put,get,err = os.popen3(cmd);
msinfo = get.readlines()
if (len(err.readlines()) != 0 or len(msinfo) == 0):
print "Could not run command: "+cmd
sys.exit(1)
nfreqmos = 0
nposmos = 0
for indx in range(len(msinfo)):
val = msinfo[indx].strip()
vals = val.split('=');
if (vals[0] == "NFreqMos"):
nfreqmos = int(vals[1])
elif (vals[0] == "NPositions"):
nposmos = int(vals[1])
# print "FM: "+str(nfreqmos)+" PM: "+str(nposmos)
if (nfreqmos == 0 or nposmos == 0):
print "Could not find mosaicing information from MSinfo"
sys.exit(1)
msinfo = {}
msinfo['nposmos'] = nposmos
msinfo['nfreqmos'] = nfreqmos
return msinfo
# Function to correct the Feed table. Arguments are the name of the MS, and
# the table object of the MS (writeable!).
def MSCorrectFeed(ms):
# In case of a combination of X/Y and L/R frontends (e.g, 6 and 13 cm) in a
# single observation, the FEED table contains n*nANT rows with n > 1. But we
# need to create a FEED table with only nAnt entries in the splitted MSes.
# Because the FEEDn columns in the main table refer to a separate column
# FIEED_ID in the FEED table (and not a rownumber!), the FEED table
# rows are not adapted, nor are the FEEDn values in the MAIN table.
# Find the unique values in the FEED1 column in the MAIN table. The input
# is a splitted MS, remember!
feedcolvalues = ms.getcol("FEED1")
uniq_feedcolvalues = list(set(feedcolvalues))
# Get the values from the FEED table column FEED_ID
tmpname = ms.name()+"/FEED"
feedtable = table(tmpname,ack=False)
feed_id = feedtable.getcol("FEED_ID")
# 'match' holds all indexes of the FEED_ID column in the FEED table whose
# values are equal to one of the unique values of FEED1 in MAIN.
# Only those rows with index in 'match' will be kept.
keep_rows = []
for i in range(len(uniq_feedcolvalues)) :
match = findall(feed_id,uniq_feedcolvalues[i])
keep_rows += match
keep_rows = set(keep_rows)
all_rows = set(range(feedtable.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
feedtable.removerows(del_rows)
feedtable.close()
# Delete the object to release all filelocks etc. If you do not do this
# copying to this table will result in unpredictable table entries...!
del feedtable
def MSCorrectSpW(ms):
# This function is needed so that the Spectral Window table matches the
# values as present in the DATA_DESCRIPTOR table.
# If there are more lines in the SW-table, than that there are referred to
# in the DATA_DESCRIPTOR table, the ms2uvfits
# converter believes the MS is a multi-IF dataset and writes the
# wrong frequency in the FQ table.
# The function creates a new shorter Spectral_window table and adapts the
# pointers to the entries in the SW-table in the DATA_DESCRIPTOR table.
#
# It must be run after the data descriptor table has been minimized with
# function MSCorrectDataDesc.
# First find the unique values in the SPW_ID column in the DataDesc table.
dd = table(ms.getkeyword('DATA_DESCRIPTION'),readonly=False,ack=False);
dd_spwid = dd.getcol("SPECTRAL_WINDOW_ID");
uniq_ddspwid = list(set(dd_spwid))
# Create a temporary (empty table) copy of the spw table in file tmpspw
spwtable = table(ms.getkeyword('SPECTRAL_WINDOW'),readonly=False,ack=False);
# For each unique SPW_ID value in the DD-table, the associated row
# in the SPECTRAL_WINDOW table is kept. We also adapt the
# values of the SPW_ID in the DD-table to match it with the new rownumber
# (which starts at 0, of course)
keep_rows = []
for i in range(len(uniq_ddspwid)):
match = findall(dd_spwid,uniq_ddspwid[i])
for j in range(len(match)):
dd.putcell("SPECTRAL_WINDOW_ID",match[j],i)
keep_rows += [uniq_ddspwid[i]]
keep_rows = set(keep_rows)
all_rows = set(range(spwtable.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
spwtable.removerows(del_rows)
spwtable.close()
# Delete the object to release all filelocks etc. If you do not do this
# copying to this table will result in unpredictable table entries...!
del spwtable
# Clean up
dd.flush()
dd.close()
# Function to reduce the Data_Descriptor table. Arguments are the name of the
# MS, and the table object of the MS (writeable!)
def MSCorrectDataDesc(ms):
# This method is needed to change the DataDescriptor table so it
# contains only the rows referred to from the MS MAIN
# table. The entries DATA_DESC_ID in the MAIN table are adapted
# to point to the new entries (rownrs) in the new data descriptor table.
# This method changes the number of entries in the DD table, but not the
# values in the table (such as SPECTRAL_WINDOW_ID); that will be done later
# (see method MSCorrectSpW).
# Find the unique values in the DATA_DESC_ID column in
# the MAIN table.
ddid = ms.getcol("DATA_DESC_ID")
uniq_ddid = list(set(ddid))
# Open the DATA_DESCRIPTION table of the splitted MS
dd_table = table(ms.getkeyword('DATA_DESCRIPTION'),readonly=False,ack=False);
# Keep the relevant rows, only, from the original table.
# Also adapt the DD_ID column in the MAIN table so the values point to the
# new rownumbers (which start at 0).
new_ddid = ms.getcol("DATA_DESC_ID");
new_row = 0
keep_rows = []
for i in range(len(uniq_ddid)):
match = findall(ddid,uniq_ddid[i])
setvalue(new_ddid,i,match)
keep_rows += [uniq_ddid[i]]
keep_rows = set(keep_rows)
all_rows = set(range(dd_table.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
dd_table.removerows(del_rows)
dd_table.close()
# Delete the object to release all filelocks etc. If you do not do this
# copying to this table will result in unpredictable table entries...!
del dd_table
# Write adapted data_descriptor_id values in the DATA_DESC_ID column in the
# MAIN table; these reflect the new rownrs in the adapted DATA_DESCRIPTION
# table.
ms.putcol("DATA_DESC_ID",new_ddid)
# Only flush() the MAIN table for now, but do not close() it; we need it
# later!
ms.flush()
# Function to reduce the SYSCAL table. Arguments are the name of the
# MS, and the table object of the MS (writeable!)
def MSCorrectSyscal(ms):
# The syscal table holds for each antenna, band and integration time the
# system temperatures and such. Therefore it has a columns SPECTRAL_WINDOW_ID,
# to refer in each row to the proper Band. As the SPECTRAL_WINDOW table will
# change, we also must adapt these values in the SYSCAl table. We also remove
# all entries which are not relevant to this MS (i.e., of bands whose data
# is not present in the MAIN table anymore.
# The method must be run after the data descriptor table has been minimized
# with function MSCorrectDataDesc.
# First find the unique values in the SPW_ID column in the DataDesc table.
dd = table(ms.getkeyword('DATA_DESCRIPTION'),readonly=True,ack=False);
dd_spwid = dd.getcol("SPECTRAL_WINDOW_ID");
uniq_ddspwid = list(set(dd_spwid))
# Find out which rows to delete from the syscal table
syscal=table(ms.getkeyword('SYSCAL'),readonly=False,ack=False);
syscal_spwid = syscal.getcol("SPECTRAL_WINDOW_ID");
uniq_syscalspwid = list(set(syscal_spwid))
keep_rows = [] # list that contains rows to keep in syscal
for i in range(len(uniq_ddspwid)):
match = findall(syscal_spwid,uniq_ddspwid[i])
keep_rows += match
keep_rows = set(keep_rows)
all_rows = set(range(syscal.nrows()))
# The ^ is an operator that selects all items from all_rows that are
# not present in keep_rows; only works on sets, not on lists. The result
# is a new set (with less entries).
# Logic is: All rows that should not be kept must be removed.
del_rows = list(all_rows ^ keep_rows)
syscal.removerows(del_rows)
syscal.flush()
# Now re-index the Spectral_window_id column value in the remaining rows.
# These must start from 0 and run upto the remaining number of
# SPECTRAL_WINDOW entries in the MS. As the remaining entries in the SYSCAL
# should be consecutive, we only need to lower these values so they start
# from 0.
syscal_spwid = syscal.getcol("SPECTRAL_WINDOW_ID");
uniq_syscalspwid = list(set(syscal_spwid))
new_spwid = range(len(uniq_syscalspwid))
for i in new_spwid:
match = findall(syscal_spwid,uniq_syscalspwid[i])
setvalue(syscal_spwid,i,match)
syscal.putcol("SPECTRAL_WINDOW_ID",syscal_spwid)
syscal.close()
def MSCorrectSubtables(msname):
ms = table(msname,readonly=False,ack=False)
MSCorrectDataDesc(ms)
MSCorrectSyscal(ms)
MSCorrectFeed(ms)
MSCorrectSpW(ms)
ms.close()
del ms
def MSSplitFreq(msname):
ms = table(msname,readonly=True,ack=False);
ms_spw = table(ms.getkeyword('SPECTRAL_WINDOW'),ack=False);
ddid_col = ms.getcol('DATA_DESC_ID');
uniq_ddid = list(set(ddid_col));
# Find the names of the Spectral_window entries; use these to find
# out the number of Spectral_window 'groups' (a number of rows)
spw_names = ms_spw.getcol('NAME');
uniq_spwnames = list(set(spw_names));
n_uniq_spw = len(uniq_spwnames);
n_spwgroups = len(spw_names)/n_uniq_spw;
rootmsname = msname.split(".")[0]
newNames = []
print 'OK - Try to split',msname,'in Frequency groups';
for i in range(n_spwgroups):
x0 = i*n_uniq_spw;
x1 = (i+1)*n_uniq_spw;
querycmd = 'DATA_DESC_ID in ['+str(x0)+':'+str(x1)+']'
sub_ms = ms.query(querycmd)
# if there is valid data for this frequency mosaic point, create a new
# MS
newName = rootmsname+'_FM'+str(i)+'.MS';
if (sub_ms.nrows() > 0):
print 'Create: '+newName;
sub_ms.copy(newName, deep=True)
# Now open the new MS as writeable, and correct the administration
MSCorrectSubtables(newName)
newNames.append(newName)
else:
print 'Not creating '+newName+' as there are no datapoints for this MS'
ms_spw.close()
ms.close()
return newNames;
def MSSplitPos(msname):
ms = table(msname,ack=False);
# Open the FIELD subtable
ms_field = table(ms.getkeyword('FIELD'),ack=False);
nfields = ms_field.nrows();
rootmsname = msname.split(".")[0]
newNames = []
print 'OK - try to split',msname,'in Positions';
for i in range(nfields):
newName = rootmsname+'_PM'+str(i)+'.MS'
querycmd = "FIELD_ID == " + str(i)
sub_ms = ms.query(querycmd);
if (sub_ms.nrows() > 0):
print 'Create: '+newName;
sub_ms.copy(newName, deep=True);
newNames.append(newName);
ms_field.close()
ms.close()
return newNames
def MSSplitTimeranges(msname,NrofTimeranges):
ms = table(msname,ack=False);
scans = ms.getcol("SCAN_NUMBER")
rowsperscan = ms.nrows()/(scans[ms.nrows()-1] - scans[0])
scansperMS = int(ms.nrows()/(NrofTimeranges*rowsperscan)) + 1
rootmsname = msname.split(".")[0]
newNames = []
print 'OK - Try to split',msname,'in',NrofTimeranges,'parts';
for i in range(NrofTimeranges):
startscan = scans[0] + i*scansperMS # Starts at 1 in the MS
endscan = scans[0] - 1 + (i+1)*scansperMS
if (endscan > scans[ms.nrows()-1]): endscan = scans[ms.nrows()-1]
if (endscan >= startscan):
querycmd = 'SCAN_NUMBER in ['+str(startscan)+':'+str(endscan)+']'
sub_ms = ms.query(querycmd)
# if there is valid data create a new MS
newName = rootmsname+'_T'+str(i)+'.MS';
if (sub_ms.nrows() > 0):
print 'Create: '+newName+' having '+str(endscan-startscan+1)+' samples';
sub_ms.copy(newName, deep=True)
newNames.append(newName)
else:
print 'Not creating '+newName+' as there are no datapoints for this MS'
ms.close()
return newNames;
def cmdparse():
usage = "%prog <MS> [<MS> <MS> <MS> etc.] [n]\n\t- <MS>: (one or more) Measurement Set names is required.\n\t- [n] : (optional) will split the MS(es) in n timeranges."
parser = OptionParser(usage)
(options, args) = parser.parse_args()
return args
# Here starts the main program
if __name__ == "__main__":
args = cmdparse()
if (len(args) == 0):
print " MSsplit.py -h gives help info"
sys.exit(0)
try:
numberMS = int(args[len(args)-1])
except (ValueError, IndexError):
numberMS = None
if (len(args) > 1 and numberMS >= 0):
if (numberMS > 1):
for arg in range(len(args)-1):
msname = args[arg];
# Check presence of MS
if (os.path.isdir(msname) == 0):
print "Could not find MS: "+msname
else:
newFiles = MSSplitTimeranges(msname,numberMS)
else:
print "Not splitting the file into less than 2 timeranges"
else:
for arg in range(len(args)):
msname = args[arg];
# Check presence of MS
if (os.path.isdir(msname) == 0):
print "Could not find MS: "+msname
else:
# Get the info on MS using MSinfo
msinfo = get_msinfo(msname)
# Frequency splitting must be done before position splitting, as the
# ms2uvfits converter cannot handle freq.mos data, but can handle posmos
if (msinfo['nfreqmos'] > 1):
newFiles = MSSplitFreq(msname)
elif(msinfo['nposmos'] > 1):
newFiles = MSSplitPos(msname)
if (len(newFiles) > 0):
print "Done. Created "+str(len(newFiles))+ " MSes."
else:
print "No new files created."
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment