Skip to content
Snippets Groups Projects
Select Git revision
  • 4916fcf7ac75bbf4dc2fba4c3552b081b1034110
  • master default protected
  • MAM-110-propagate-output-sasid
  • MAM-109-specify-ingest-location
  • master-backup-september-2024
5 results

algorithms.py

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    visualize.py 5.16 KiB
    #!/usr/bin/python
    
    from graphviz import Digraph
    import sys
    import siplib
    import ltasip
    
    def visualize_sip(sip):
    
        linkstodataproduct = {}
        linkstoprocess = {}
    
        dot_wrapper = Digraph('cluster_wrapper')
    
        # ---
        # create legend
        dot_legend = Digraph('cluster_legend') # graphviz needs a label starting with cluster to render styles, oh boy...
        dot_legend.body.append('style=filled')
        dot_legend.body.append('bgcolor=lightgrey')
        dot_legend.body.append('label="Legend\n\n"')
    
        dot_legend.node('A',"Described Dataproduct",style="filled",fillcolor="cadetblue", shape="note")
        dot_legend.node('B',"Related Dataproduct",style="filled",fillcolor="cadetblue2", shape="note")
        dot_legend.node('C',"Observation", style="filled", fillcolor="gold",shape="octagon")
        dot_legend.node('D',"Pipeline/Process ",style="filled",fillcolor="chartreuse", shape="cds")
        dot_legend.node('E', "Unspec. Process", style="filled", fillcolor="orange", shape="hexagon")
        dot_legend.edge('A','B',color="invis")
        dot_legend.edge('B','C',color="invis")
        dot_legend.edge('C','D',color="invis")
        dot_legend.edge('D','E',color="invis")
    
        # ---
        # create the actual sip graph
        dot = Digraph('cluster_sip')
        dot.body.append('style=filled')
        dot.body.append('bgcolor=lightgrey')
        dot.body.append('label = "'+str(sip.project.projectCode+" - "+sip.project.projectDescription)+'\n\n"')
    
        # the dataproduct that is described by the sip
        data_out =  sip.dataProduct
        id_out = data_out.dataProductIdentifier.identifier
        dot.node(id_out, id_out +": "+data_out.fileName,style="filled",fillcolor="cadetblue", shape="note")
        id_process = data_out.processIdentifier.identifier
        # keep reference to originating pipeline run / observation:
        linkstodataproduct.setdefault(id_out,[]).append(id_process)
    
        # the input / intermediate dataproducts
        for data_in in sip.relatedDataProduct:
            id_in = data_in.dataProductIdentifier.identifier
            dot.node(id_in, id_in +": "+data_in.fileName, style="filled", shape="note",fillcolor="cadetblue2")
            id_process = data_in.processIdentifier.identifier
            # keep reference to originating pipeline run / observation:
            linkstodataproduct.setdefault(id_in,[]).append(id_process)
    
        # the observations
        for obs in sip.observation:
            id_obs = obs.observationId.identifier
            id_process = obs.processIdentifier.identifier
            dot.node(id_obs, id_obs + ": "+ id_process, style="filled", fillcolor="gold",shape="octagon")
            # no incoming data here, but register node as present:
            linkstoprocess.setdefault(id_obs,[])
    
        # the data processing steps
        for pipe in sip.pipelineRun:
            id_pipe = pipe.processIdentifier.identifier
            dot.node(id_pipe, id_pipe+" ", style="filled", fillcolor="chartreuse", shape="cds")
            # keep reference to input dataproducts:
            id_in = []
            for id in pipe.sourceData:
                id_in.append(id.identifier)
            linkstoprocess.setdefault(id_pipe,[]).append(id_in)
    
        # the data processing steps
        for unspec in sip.unspecifiedProcess:
            id_unspec = unspec.processIdentifier.identifier
            dot.node(id_unspec, id_unspec, style="filled", fillcolor="orange", shape="hexagon")
            # no incoming data here, but register node as present:
            linkstoprocess.setdefault(id_unspec,[])
    
    
        # todo: online processing
        # todo: parsets (?)
    
        print linkstoprocess
        print linkstodataproduct
    
        # add edges:
        for id in linkstodataproduct:
            for id_from in linkstodataproduct.get(id):
                if id_from in linkstoprocess:
                    dot.edge(id_from, id)
                else:
                    print "Error: The pipeline or observation that created dataproduct '"+ id + "' seems to be missing! -> ", id_from
    
        for id in linkstoprocess:
            for id_from in linkstoprocess.get(id):
                if id_from in linkstodataproduct:
                    dot.edge(id_from, id)
                else:
                    print "Error: The input dataproduct for pipeline '"+ id +"' seems to be missing! -> ", id_from
    
    
        # ----
        # render graph:
        dot_wrapper.subgraph(dot_legend)
        dot_wrapper.subgraph(dot)
        dot_wrapper = stylize(dot_wrapper)
        dot_wrapper.render('sip.gv', view=True)
    
    
    
    
    
    
    def stylize(graph):
        styles = {
        'graph': {
            'fontname': 'Helvetica',
            'fontsize': '18',
            'fontcolor': 'grey8',
            'bgcolor': 'grey90',
            'rankdir': 'TB',
        },
        'nodes': {
            'fontname': 'Helvetica',
            'fontcolor': 'grey8',
            'color': 'grey8',
        },
        'edges': {
            'arrowhead': 'open',
            'fontname': 'Courier',
            'fontsize': '12',
            'fontcolor': 'grey8',
        }
        }
    
        graph.graph_attr.update(
            ('graph' in styles and styles['graph']) or {}
        )
        graph.node_attr.update(
            ('nodes' in styles and styles['nodes']) or {}
        )
        graph.edge_attr.update(
            ('edges' in styles and styles['edges']) or {}
        )
        return graph
    
    
    
    
    def main(argv):
        print "Reading xml from file", argv[1]
        with open(argv[1]) as f:
            xml = f.read()
        sip = ltasip.CreateFromDocument(xml)
        visualize_sip(sip)
    
    
    if __name__ == '__main__':
        main(sys.argv)