diff --git a/lib/constants_generator.py b/lib/constants_generator.py index 16d7242d63fede4531dd40946c86f779f5eb4b94..d5b055493b9c7004cda8cc734c52576597c84108 100755 --- a/lib/constants_generator.py +++ b/lib/constants_generator.py @@ -77,7 +77,7 @@ def main(path): else: value = str(value) line = key+"="+value+"\n" - logger.info(line, end=' ') + logger.info(line) f.write(line) diff --git a/lib/feedback.py b/lib/feedback.py index 47cfa3459fa0ad88baff43c2a4b6aaae4622bb50..dc1a08be74f5c19c2c38693b785def1c5bc21108 100644 --- a/lib/feedback.py +++ b/lib/feedback.py @@ -45,7 +45,7 @@ class Feedback(): def __init__(self, feedback): self.__inputstrings = feedback self.__tree = {} - logger.info("parsing",len(feedback),"lines of feedback") + logger.info("parsing %s lines of feedback" % len(feedback)) for line in feedback: if line.strip() and not line.startswith("#"): try: @@ -60,7 +60,7 @@ class Feedback(): except: t[key.split('.')[-1]] = value.strip() except: - logger.info("Skipping line:", line) + logger.info("Skipping line: %s" % line) # Now self.__tree holds nested dicts according to the dot-encoded key hierarchy #pprint.pprint(self.__tree) @@ -107,7 +107,7 @@ class Feedback(): dps = [(k, dp) for (k, dp) in list(dps.items()) if k.startswith("Output_")] for k, dp in dps: - logger.info("Parsing",k,"...") + logger.info("Parsing %s..." % k) # correct timestamp format startt=dp.get("startTime") @@ -169,7 +169,7 @@ class Feedback(): if elem.get(prefix): elem = elem.get(prefix) else: - logger.info("provided prefix seems to be wrong: '"+prefix+"' not in", list(elem.keys())) + logger.info("provided prefix seems to be wrong: %s not in %s" % (prefix, list(elem.keys()))) return elem @@ -305,8 +305,8 @@ class Feedback(): for dataproduct in self.get_dataproducts(prefix=dp_prefix): filename = None try: - filename = dataproduct.get_pyxb_dataproduct().fileName - logger.info("Creating SIP for", filename) + filename = dataproduct._get_pyxb_dataproduct().fileName + logger.info("Creating SIP for %s" % filename) # create SIP document for dataproduct sip = self.__get_basic_sip(dataproduct) @@ -355,7 +355,7 @@ class Feedback(): except Exception as err: if not filename: filename = "UNDEFINED" - logger.info("Could not create SIP for", filename,"->",err) + logger.info("Could not create SIP for %s -> %s" % (filename,err)) if sips: return sips @@ -364,7 +364,7 @@ class Feedback(): def example(fil): - logger.info("Now running example on file", fil) + logger.info("Now running example on file %s" % fil) with open(fil) as f: text = f.readlines() @@ -373,7 +373,7 @@ def example(fil): # A) Parse complete SIP: sips = feedback.get_dataproduct_sips(obs_prefix="ObsSW.Observation", dp_prefix="Observation.DataProducts") for key in list(sips.keys()): - logger.info("Created SIP for file "+ str(key)) + logger.info("Created SIP for file %s" %key) # B) Alternatively: Parse dataproducts from pseudo-feedback (specialty of Leiden group): diff --git a/lib/query.py b/lib/query.py index 8f6788350da34e3c972905bd25b75cd3117eed4c..f1eae290b73fe890c9d7c25ba7f21c7ce560e1b6 100644 --- a/lib/query.py +++ b/lib/query.py @@ -25,7 +25,7 @@ if not exists(path): file.write("host=\n") with open(path,'r') as file: - logger.info("Parsing user credentials from",path) + logger.info("Parsing user credentials from %s" % path) for line in file: if line.startswith("user"): user = line.split('=')[1].strip() diff --git a/lib/validator.py b/lib/validator.py index 9b5d2688ef20cb495afcc4e58d8756d8f4a0cc37..0825f15997787283650a28886ed9e4e735d7a23e 100644 --- a/lib/validator.py +++ b/lib/validator.py @@ -13,7 +13,7 @@ DEFAULT_SIP_XSD_PATH = os.path.join(os.environ.get('LOFARROOT', '/opt/lofar'), ' def validate(xmlpath, xsdpath=DEFAULT_SIP_XSD_PATH): '''validates given xml file against given xsd file''' - logger.info("validating", xmlpath, "against", xsdpath) + logger.info("validating %s against %s" % (xmlpath, xsdpath)) with open(xsdpath) as xsd: xmlschema_doc = etree.parse(xsd) @@ -33,71 +33,81 @@ def validate(xmlpath, xsdpath=DEFAULT_SIP_XSD_PATH): return valid -def check_consistency(xmlpath): +def check_consistency(sip): + ''' + since a SIP can be valid XML but make no sense structurally, this makes sure that SIP contents contain a tree + structure that ends with a single described dataproduct, and all pipelines and intermediary dataproducts are + properly chained together. + ''' + + sip = sip._get_pyxb_sip(suppress_warning=True) + linkstodataproduct = {} + linkstoprocess = {} + + # the dataproduct that is described by the sip + data_out = sip.dataProduct + id_out = str(data_out.dataProductIdentifier.identifier) + id_process = str(data_out.processIdentifier.identifier) + linkstodataproduct.setdefault(id_out, []).append(id_process) + + # the input / intermediate dataproducts + for data_in in sip.relatedDataProduct: + id_in = str(data_in.dataProductIdentifier.identifier) + id_process = str(data_in.processIdentifier.identifier) + linkstodataproduct.setdefault(id_in, []).append(id_process) + + # the observations + for obs in sip.observation: + id_obs = str(obs.observationId.identifier) + id_process = str(obs.processIdentifier.identifier) + linkstoprocess.setdefault(id_process, []) + + # the data processing steps + for pipe in sip.pipelineRun: + id_pipe = str(pipe.processIdentifier.identifier) + id_in = [] + for elem in pipe.sourceData.orderedContent(): + id_in.append(str(elem.value.identifier)) + linkstoprocess.setdefault(id_pipe, []).append(id_in) + + # the data processing steps + for unspec in sip.unspecifiedProcess: + id_unspec = str(unspec.processIdentifier.identifier) + linkstoprocess.setdefault(id_unspec, []) + + # todo: online processing + # todo: parsets (?) + + for id in linkstodataproduct: + for id_from in linkstodataproduct.get(id): + if not id_from in linkstoprocess: + raise Exception( + "The pipeline or observation that created dataproduct '" + id + "' seems to be missing! -> ", + id_from) + + for id in linkstoprocess: + for ids_from in linkstoprocess.get(id): + for id_from in ids_from: + if not id_from in linkstodataproduct: + raise Exception("The input dataproduct for pipeline '" + id + "' seems to be missing! -> ", id_from) + + logger.info("General SIP structure seems ok!") + return True # already raised Exception if there was a problem... + + +def check_consistency_of_file(xmlpath): """ Checks the general structure of the provided SIP XML. E.g.: Is/Are the processes/es present that created the described dataproduct / related dataproducts? Are the input dataproducts for these processes present? """ - logger.info("Checking", xmlpath, "for structural consistency") + logger.info("Checking %s for structural consistency" % xmlpath) with open(xmlpath) as f: xml = f.read() - sip = ltasip.CreateFromDocument(xml) - - - linkstodataproduct = {} - linkstoprocess = {} - - # the dataproduct that is described by the sip - data_out = sip.dataProduct - id_out = str(data_out.dataProductIdentifier.identifier) - id_process = str(data_out.processIdentifier.identifier) - linkstodataproduct.setdefault(id_out,[]).append(id_process) - - # the input / intermediate dataproducts - for data_in in sip.relatedDataProduct: - id_in = str(data_in.dataProductIdentifier.identifier) - id_process = str(data_in.processIdentifier.identifier) - linkstodataproduct.setdefault(id_in,[]).append(id_process) - - # the observations - for obs in sip.observation: - id_obs = str(obs.observationId.identifier) - id_process = str(obs.processIdentifier.identifier) - linkstoprocess.setdefault(id_process,[]) - - # the data processing steps - for pipe in sip.pipelineRun: - id_pipe = str(pipe.processIdentifier.identifier) - id_in = [] - for elem in pipe.sourceData.orderedContent(): - id_in.append(str(elem.value.identifier)) - linkstoprocess.setdefault(id_pipe,[]).append(id_in) - - # the data processing steps - for unspec in sip.unspecifiedProcess: - id_unspec = str(unspec.processIdentifier.identifier) - linkstoprocess.setdefault(id_unspec,[]) - - - # todo: online processing - # todo: parsets (?) - - for id in linkstodataproduct: - for id_from in linkstodataproduct.get(id): - if not id_from in linkstoprocess: - raise Exception("The pipeline or observation that created dataproduct '"+ id + "' seems to be missing! -> ", id_from) - - for id in linkstoprocess: - for ids_from in linkstoprocess.get(id): - for id_from in ids_from: - if not id_from in linkstodataproduct: - raise Exception("The input dataproduct for pipeline '"+ id +"' seems to be missing! -> ", id_from) - - logger.info("General SIP structure seems ok!") - return True # already raised Exception if there was a problem... + sip = siplib.Sip.fromxml(xml) + return check_consistency(sip) def main(xml): @@ -109,7 +119,7 @@ def main(xml): xml = xml xsd = DEFAULT_SIP_XSD_PATH valid = validate(xml, xsd) - consistent = check_consistency(xml) + consistent = check_consistency_of_file(xml) return valid and consistent except Exception as err: logger.error(err) diff --git a/lib/visualizer.py b/lib/visualizer.py index ca41fe0b2fe476b016da6dc0e324b13c274ece9a..225016ef8629c82952fe105eee966782181d03fa 100755 --- a/lib/visualizer.py +++ b/lib/visualizer.py @@ -47,7 +47,7 @@ def visualize_sip(sip, path=None, format="svg", view=False): data_out = sip.dataProduct id_out = str(data_out.dataProductIdentifier.identifier) dot.node(id_out, id_out +": "+data_out.fileName,style="filled",fillcolor="cadetblue", shape="note") - logger.info("adding node for final dataproduct ", id_out) + logger.info("adding node for final dataproduct %s" % id_out) id_process = str(data_out.processIdentifier.identifier) # keep reference to originating pipeline run / observation: linkstodataproduct.setdefault(id_out,[]).append(id_process) @@ -56,7 +56,7 @@ def visualize_sip(sip, path=None, format="svg", view=False): for data_in in sip.relatedDataProduct: id_in = str(data_in.dataProductIdentifier.identifier) dot.node(id_in, id_in +": "+data_in.fileName, style="filled", shape="note",fillcolor="cadetblue2") - logger.info("adding node for dataproduct ", id_in) + logger.info("adding node for dataproduct %s" % id_in) id_process = str(data_in.processIdentifier.identifier) # keep reference to originating pipeline run / observation: linkstodataproduct.setdefault(id_in,[]).append(id_process) @@ -66,7 +66,7 @@ def visualize_sip(sip, path=None, format="svg", view=False): id_obs = str(obs.observationId.identifier) id_process = str(obs.processIdentifier.identifier) dot.node(id_process, id_process + ": "+ id_obs, style="filled", fillcolor="gold",shape="octagon") - logger.info("adding node for observation ", id_process) + logger.info("adding node for observation %s" % id_process) # no incoming data here, but register node as present: linkstoprocess.setdefault(id_process,[]) @@ -74,7 +74,7 @@ def visualize_sip(sip, path=None, format="svg", view=False): for pipe in sip.pipelineRun: id_pipe = str(pipe.processIdentifier.identifier) dot.node(id_pipe, id_pipe+" ", style="filled", fillcolor="chartreuse", shape="cds") - logger.info("adding node for pipelinerun ", id_pipe) + logger.info("adding node for pipelinerun %s" % id_pipe) # keep reference to input dataproducts: id_in = [] for elem in pipe.sourceData.orderedContent(): @@ -85,7 +85,7 @@ def visualize_sip(sip, path=None, format="svg", view=False): for unspec in sip.unspecifiedProcess: id_unspec = str(unspec.processIdentifier.identifier) dot.node(id_unspec, id_unspec, style="filled", fillcolor="orange", shape="hexagon") - logger.info("adding node for unspecified process ", id_unspec) + logger.info("adding node for unspecified process %s" % id_unspec) # no incoming data here, but register node as present: linkstoprocess.setdefault(id_unspec,[]) @@ -165,7 +165,7 @@ def stylize(graph): def main(xmlpath): - logger.info("Reading xml from file", xmlpath) + logger.info("Reading xml from file %s" % xmlpath) with open(xmlpath) as f: xml = f.read() sip = ltasip.CreateFromDocument(xml) diff --git a/test/test_validator.py b/test/test_validator.py index ba55e2543be81ecf8ed0bf8339f76f15f664a71e..7becc673afb4bf68b199d472c3e5b0de89b0572e 100755 --- a/test/test_validator.py +++ b/test/test_validator.py @@ -34,7 +34,7 @@ VALIDFILE_PATH = "valid_sip.xml" class TestSIPvalidator(unittest.TestCase): def test_validate(self): self.assertTrue(validator.validate(VALIDFILE_PATH)) - self.assertTrue(validator.check_consistency(VALIDFILE_PATH)) + self.assertTrue(validator.check_consistency_of_file(VALIDFILE_PATH)) self.assertTrue(validator.main(VALIDFILE_PATH)) # run tests if main