From a63e1f30ce2b2fdbdc2d257433b5149ba31d876a Mon Sep 17 00:00:00 2001 From: Ger van Diepen <diepen@astron.nl> Date: Thu, 22 Mar 2012 10:29:37 +0000 Subject: [PATCH] Task #3054 Added the option to specify the file name patterns on the command line --- CEP/MS/src/expandcalps | 31 ++++++++++++++++-- CEP/MS/src/expandimageps | 14 +++++++- CEP/MS/src/expandtargetps | 21 +++++++++--- CEP/MS/src/mstools.py | 68 +++++++++++++++++++++++++++++++-------- 4 files changed, 113 insertions(+), 21 deletions(-) diff --git a/CEP/MS/src/expandcalps b/CEP/MS/src/expandcalps index dab5eb393bc..9532e10a8be 100755 --- a/CEP/MS/src/expandcalps +++ b/CEP/MS/src/expandcalps @@ -7,9 +7,34 @@ if __name__ == "__main__": if len(sys.argv) < 3: print '' print 'Insufficient arguments; run as:' - print ' expandcalps parsetin parsetout' + print ' expandcalps parsetin parsetout [mspattern, [pdbname]]' + print '' + print ' If an mspattern is given, it must be the name of a directory' + print ' containing the MeasurementSets or a glob-pattern matching the' + print ' MeasurementSets to process. If a directory name is given, the' + print ' *.MS will be added to make it a glob pattern.' + print ' If no or an empty mspattern is given, it means that the msin' + print ' parameter in the input parset defines the glob-pattern for' + print ' the MeasurementSets to process.' + print '' + print ' If a pdbname is given, it must define the names of the output' + print ' ParmDBs using placeholders like <BN>.' + print ' If no or an empty pdbname is given, the pdbout parameter in the' + print ' input parset defines the output names' print '' sys.exit(1) + # See if the MS pattern is given. + inname = 'msin' + if len(sys.argv) > 3 and len(sys.argv[3]) > 0: + # Add *.MS if no glob pattern given. Turn it into a list. + inname = [lmt.addfileglob (sys.argv[3], '*.MS')] + # See if the ParmDB name is given. + outname = 'pdbout' + if len(sys.argv) > 4 and len(sys.argv[4]) > 0: + outname = [sys.argv[4]] + # Expand the parset. + print inname, outname + exit lmt.expandps (sys.argv[1], sys.argv[2], - {'in': [['msin', 'Input_Correlated']], - 'out': [['pbout', 'Output_InstrumentModel']]}) + {'in': [(inname, 'Input_Correlated')], + 'out': [(outname, 'Output_InstrumentModel')]}) diff --git a/CEP/MS/src/expandimageps b/CEP/MS/src/expandimageps index 4c501613b10..35d19c7b854 100755 --- a/CEP/MS/src/expandimageps +++ b/CEP/MS/src/expandimageps @@ -7,7 +7,7 @@ if __name__ == "__main__": if len(sys.argv) < 3: print '' print 'Insufficient arguments; run as:' - print ' expandimageps parsetin parsetout [nsubbands] [nodeindex]' + print ' expandimageps parsetin parsetout [nsubbands [nodeindex]' print ' nsubbands number of subbands in a single image' print ' default = 10' print ' nodeindex subband whose node to use to run the imaging pipeline on' @@ -20,6 +20,18 @@ if __name__ == "__main__": nsubbands = int(sys.argv[3]) if len(sys.argv) > 4: nodeindex = int(sys.argv[4]) + # See if the MS pattern is given. + msname = 'msin' + if len(sys.argv) > 3 and len(sys.argv[3]) > 0: + msname = [sys.argv[3]] + # See if the ParmDB pattern is given. + pdbname = 'pdbin' + if len(sys.argv) > 4 and len(sys.argv[4]) > 0: + pdbname = [sys.argv[4]] + # See if the MS output name is given. + outname = 'msout' + if len(sys.argv) > 5 and len(sys.argv[45]) > 0: + outname = [sys.argv[5]] lmt.expandps (sys.argv[1], sys.argv[2], {'in': [['msin', 'Input_Correlated']], 'out': [['imgout', 'Output_SkyImage']]}, diff --git a/CEP/MS/src/expandtargetps b/CEP/MS/src/expandtargetps index 9e6b7f221d5..0e87ba87bbc 100755 --- a/CEP/MS/src/expandtargetps +++ b/CEP/MS/src/expandtargetps @@ -7,10 +7,23 @@ if __name__ == "__main__": if len(sys.argv) < 3: print '' print 'Insufficient arguments; run as:' - print ' expandtargetps parsetin parsetout' + print ' expandtargetps parsetin parsetout [mspattern [pdbpattern [outname]]]' print '' sys.exit(1) + # See if the MS pattern is given. + msname = 'msin' + if len(sys.argv) > 3 and len(sys.argv[3]) > 0: + msname = [sys.argv[3]] + # See if the ParmDB pattern is given. + pdbname = 'pdbin' + if len(sys.argv) > 4 and len(sys.argv[4]) > 0: + pdbname = [sys.argv[4]] + # See if the MS output name is given. + outname = 'msout' + if len(sys.argv) > 5 and len(sys.argv[5]) > 0: + outname = [sys.argv[5]] + # Expand the parset. lmt.expandps (sys.argv[1], sys.argv[2], - {'in': [['msin', 'Input_Correlated'], - ['pbin', 'Input_InstrumentModel']], - 'out': [['msout', 'Output_Correlated']]}) + {'in': [[msname, 'Input_Correlated'], + [pdbname, 'Input_InstrumentModel']], + 'out': [[outname, 'Output_Correlated']]}) diff --git a/CEP/MS/src/mstools.py b/CEP/MS/src/mstools.py index 71a42104e00..b249d3392b7 100644 --- a/CEP/MS/src/mstools.py +++ b/CEP/MS/src/mstools.py @@ -142,6 +142,28 @@ def movemss (srcPattern, dstPattern, userName, bandsPerBeam=80, dryrun=False): os.system (cmd) print nInPlace, "source files are already on the correct destination mode" +def addfileglob (filename, pattern): + """ If needed, add the glob pattern to the filename + + If the basename of the filename does not contain glob characters + (*, ?, [], or {}), the glob pattern is added. + + """ + hasglob = False + if filename[-1] == '/': + filename = filename[:-1] + else: + import os + bname = os.path.basename(filename) + hasglob = False + for c in '*?[{': + if c in bname: + hasglob = True + break + if hasglob: + return filename + return filename + '/' + pattern + def expandps (parsetin, parsetout, keymap, nsubbands=0, nodeindex=0): """ Expand dataset names in a parset file @@ -166,15 +188,21 @@ def expandps (parsetin, parsetout, keymap, nsubbands=0, nodeindex=0): The parameter in the input parset defines one or more filename glob patterns. Usually one pattern will be used, but multiple are needed for e.g. the imaging pipeline (a pattern per observation slice). + Instead of a parameter name, it is also possible to directly give a + list of glob patterns directly. Thus passing a string means a parameter + name, while a list means glob patterns. | 'out' maps to a list of pairs. Each pair defines the names of the parameter in the input and output parset. The parameter value in the - input parset can contain the following cexecms-like place holders: + input parset must define the location of the output. It can contain + the following cexecms-like placeholders: | <DN> is the directory name of the input dataset | <BN> is the basename of the input dataset | <BN.> is the basename till the first dot (thus without the extension) | <.BN> is the basename after the first dot (thus the extension) | <SEQ> is a 3 digit sequence number (000, 001, ...) useful for the imaging pipeline. + Instead of an input parameter name, it is possible to directly give + the output location. by passing it as a list containing one element. nsubbands If > 0, the number of subbands in a subband group (i.e., the number of subbands to combine in an image). @@ -238,10 +266,10 @@ def expandps (parsetin, parsetout, keymap, nsubbands=0, nodeindex=0): # Open parset and get all keywords. ps = lofar.parameterset.parameterset (parsetin) pskeys = ps.keys() - # Write nsubbands parameter if given; otherwise set to 1. - if nsubbands > 0: - ps.add ('nsubbands', str(nsubbands)); - else: + # See if nsubbands parameter is given; otherwise set to 1. + havesubbands = true + if nsubbands <= 0: + havesubbands = false nsubbands = 1 # Check and initialize. if nodeindex < 0 or nodeindex >= nsubbands: @@ -255,11 +283,15 @@ def expandps (parsetin, parsetout, keymap, nsubbands=0, nodeindex=0): inkeys = keymap["in"] nrproc = 1 for (keyin,keyout) in inkeys: - # Find keyin in the parset - if keyin not in pskeys: - raise KeyError, "keyword " + keyin + " not found in parset " + parsetin - # Get the file name patterns/ - patterns = ps.getStringVector(keyin) + # If a string, find keyin in the parset. + # Otherwise it defines the glob patterns. + if isinstance(keyin, str): + if keyin not in pskeys: + raise KeyError, "keyword " + keyin + " not found in parset " + parsetin + # Get the file name pattern + patterns = ps.getStringVector(keyin) + else: + patterns = keyin locs = [] names = [] for patt in patterns: @@ -285,6 +317,11 @@ def expandps (parsetin, parsetout, keymap, nsubbands=0, nodeindex=0): ps.add (newkey + '.filenames', str(names)); ps.remove (keyin) + # Write nsubbands if needed. + if havesubbands: + ps.add ('subbands_per_image', str(nsubbands)); + ps.add ('slices_per_image', str(nslice)); + # Process output keywords if they are present. if 'out' in keymap: if len(filenames) == 0: @@ -292,9 +329,14 @@ def expandps (parsetin, parsetout, keymap, nsubbands=0, nodeindex=0): inkeys = keymap["out"] nrproc += 1 for (keyin,keyout) in inkeys: - if keyin not in pskeys: - raise KeyError, "keyword " + keyin + " not found in parset " + parsetin - name = ps.getString(keyin) + if isinstance(keyin, str): + if keyin not in pskeys: + raise KeyError, "keyword " + keyin + " not found in parset " + parsetin + name = ps.getString(keyin) + else: + if len(keyin) != 1: + raise KeyError, "Output key " + keyin + " is not a string, thus should be a sequence of length 1" + name = keyin[0]; locs = [] names = [] # Create output for all input names replacing tags like <BN>. -- GitLab