diff --git a/graph_creation/cwl_processing.py b/graph_creation/cwl_processing.py index 34c4f490f1cce09e5367b13118108168fb46ccc6..e5b2dc58128729e9348054d7ffbdc36497e851bb 100644 --- a/graph_creation/cwl_processing.py +++ b/graph_creation/cwl_processing.py @@ -1,6 +1,6 @@ from neo4j import Driver from graph_creation.cst_processing import traverse_and_create, traverse_when_statement_extract_dependencies -from graph_creation.utils import create_input_nodes_and_relationships, create_main_input_nodes_and_relationships, create_right_control_relationship, create_right_data_relationship, process_source_relationship, resolve_relative_path +from graph_creation.utils import create_input_nodes_and_relationships, create_main_input_nodes_and_relationships, create_right_control_relationship, create_right_data_relationship, get_pipelines, is_relevant_pipeline, process_source_relationship, resolve_relative_path from neo4j_queries.node_queries import ensure_component_node, ensure_data_node, ensure_in_parameter_node, ensure_main_component_node, ensure_main_in_parameter_node, ensure_main_out_parameter_node, ensure_out_parameter_node from neo4j_queries.edge_queries import create_data_relationship, create_main_out_param_relationship, create_out_param_relationship from pathlib import Path @@ -167,22 +167,24 @@ def process_cwl_steps(driver: Driver, cwl_entity: dict) -> None: workflow_folder = Path(cwl_entity['path']).parent full_step_path = workflow_folder / Path(step['run']) step_path = str(resolve_relative_path(full_step_path)) + relevant_pipeline = is_relevant_pipeline(step_path) # Create the step component node with ID equal to the step - if "compress_pipeline.cwl" not in step_path: + pipelines = get_pipelines() + if not relevant_pipeline: s_node = ensure_component_node(driver, step_path) s_node_internal_id = s_node[0] # Process the list of inputs of the step for input in step['in']: # Create in-parameter node with ID as defined in the component and component ID equal to the path of the step - if "compress_pipeline.cwl" in step_path: + if relevant_pipeline: param_node = ensure_main_in_parameter_node(driver, input['id'], step_path) else: param_node = ensure_in_parameter_node(driver, input['id'], step_path) param_node_internal_id = param_node[0] # Create a data edge from the step component node to the in-parameter node - if "compress_pipeline.cwl" not in step_path: + if not relevant_pipeline: create_data_relationship(driver, s_node_internal_id, param_node_internal_id) # Inputs can have one or multiple data sources (data nodes) @@ -205,7 +207,7 @@ def process_cwl_steps(driver: Driver, cwl_entity: dict) -> None: for ref in when_refs: ref_id = ref[1] if ref[0] == "parameter": - if "compress_pipeline.cwl" in step_path: + if any(pipeline == Path(step_path).stem for pipeline in pipelines): input_data = ensure_main_in_parameter_node(driver, ref_id, step_path)[0] else: input_data = ensure_in_parameter_node(driver, ref_id, step_path)[0] @@ -225,13 +227,13 @@ def process_cwl_steps(driver: Driver, cwl_entity: dict) -> None: else: output_id = output # Create out-parameter node with ID as defined in the component and component ID equal to the path of the step - if "compress_pipeline.cwl" in step_path: + if relevant_pipeline: param_node = ensure_main_out_parameter_node(driver, output_id, step_path) else: param_node = ensure_out_parameter_node(driver, output_id, step_path) param_node_internal_id = param_node[0] # Create a data edge from out-parameter node to the step component node - if "compress_pipeline.cwl" not in step_path: + if not relevant_pipeline: create_data_relationship(driver, param_node_internal_id, s_node_internal_id) # Create data node with id equal to step_id/output_id and component ID equal to the path of the outer workflow outer_output_id = f"{step['id']}/{output_id}" diff --git a/graph_creation/repo_processing.py b/graph_creation/repo_processing.py index d430708c8ab1a9abbd1385932f97e48fa16960d9..b4eb4a73c3abbcd31b09c37fd9a55568eb458756 100644 --- a/graph_creation/repo_processing.py +++ b/graph_creation/repo_processing.py @@ -1,6 +1,8 @@ +from pathlib import Path from neo4j import Driver from graph_creation.cwl_parsing import get_cwl_from_repo from graph_creation.cwl_processing import process_cwl_expression, process_cwl_inputs, process_cwl_main_inputs, process_cwl_main_outputs, process_cwl_outputs, process_cwl_steps +from graph_creation.utils import get_pipelines from neo4j_queries.edge_queries import simplify_data_and_control_edges from neo4j_queries.node_queries import ensure_component_node, ensure_main_component_node @@ -18,9 +20,10 @@ def process_repos(repo_list: list[str], driver: Driver) -> None: # Parse CWL files cwl_entities[repo]= get_cwl_from_repo(repo) for entity in cwl_entities[repo]: - pipelines = ["compress_pipeline.cwl"] - if any(pipeline in entity['path'] for pipeline in pipelines): - component_id = entity['path'] + nice_id = Path(entity['path']).stem + pipelines = get_pipelines() + if any(pipeline == nice_id for pipeline in pipelines): + # component_id = entity['path'] # ensure_main_component_node(driver, component_id) process_cwl_main_inputs(driver, entity) process_cwl_main_outputs(driver, entity) diff --git a/graph_creation/utils.py b/graph_creation/utils.py index f06046d03fd5cb72145c3cbbd205775d63a404f4..ca7799582100292e6540511bf3676e204e7fedf9 100644 --- a/graph_creation/utils.py +++ b/graph_creation/utils.py @@ -1,14 +1,14 @@ from pathlib import Path from neo4j import Driver from neo4j_queries.node_queries import ensure_data_node, ensure_in_parameter_node, ensure_main_in_parameter_node -from neo4j_queries.edge_queries import create_compress_control_relationship, create_compress_data_relationship, create_control_relationship, create_data_relationship, create_download_control_relationship, create_download_data_relationship, create_in_param_relationship, create_main_in_param_relationship +from neo4j_queries.edge_queries import create_compress_control_relationship, create_compress_data_relationship, create_control_relationship, create_data_relationship, create_inspect_control_relationship, create_inspect_data_relationship, create_in_param_relationship def create_right_data_relationship(driver: Driver, node_internal_id_1: int, node_internal_id_2: int, nice_id: str) -> None: if "compress_pipeline" == nice_id: create_compress_data_relationship(driver, node_internal_id_1, node_internal_id_2) - elif "download_and_compress_pipeline" == nice_id: - create_download_data_relationship(driver, node_internal_id_1, node_internal_id_2) + elif "inspect_workflow" == nice_id: + create_inspect_data_relationship(driver, node_internal_id_1, node_internal_id_2) else: create_data_relationship(driver, node_internal_id_1, node_internal_id_2) @@ -16,8 +16,8 @@ def create_right_control_relationship(driver: Driver, node_internal_id_1: int, n nice_id = Path(component_id).stem if "compress_pipeline" == nice_id: create_compress_control_relationship(driver, node_internal_id_1, node_internal_id_2, component_id) - elif "download_and_compress_pipeline" == nice_id: - create_download_control_relationship(driver, node_internal_id_1, node_internal_id_2, component_id) + elif "inspect_workflow" == nice_id: + create_inspect_control_relationship(driver, node_internal_id_1, node_internal_id_2, component_id) else: create_control_relationship(driver, node_internal_id_1, node_internal_id_2, component_id) @@ -119,4 +119,11 @@ def resolve_relative_path(path: Path)-> Path: parts.pop() elif part != ".": parts.append(part) - return Path(*parts) \ No newline at end of file + return Path(*parts) + +def get_pipelines() -> list[str]: + return ["compress_pipeline", "inspect_workflow"] + +def is_relevant_pipeline(path: str) -> bool: + pipelines = get_pipelines() + return any(pipeline == Path(path).stem for pipeline in pipelines) \ No newline at end of file diff --git a/neo4j_queries/edge_queries.py b/neo4j_queries/edge_queries.py index cd38faad739bd5617e73f657cd859d4ee4b6e81e..e84f1efb3926e030efc1f0256b1100a913066fb4 100644 --- a/neo4j_queries/edge_queries.py +++ b/neo4j_queries/edge_queries.py @@ -169,7 +169,7 @@ def create_compress_data_relationship(driver: Driver, from_internal_node_id: int record = result.single() return record["id_1"], record["id_2"] -def create_download_data_relationship(driver: Driver, from_internal_node_id: int, to_internal_node_id: int) -> tuple[int,int]: +def create_inspect_data_relationship(driver: Driver, from_internal_node_id: int, to_internal_node_id: int) -> tuple[int,int]: """ Creates a data dependency relationship in Neo4j between the two nodes with Neo4j internal IDs given as parameters. This relationship is an outgoing data edge from the node with internal ID from_internal_node_id @@ -186,7 +186,7 @@ def create_download_data_relationship(driver: Driver, from_internal_node_id: int query = """ MATCH (a), (b) WHERE elementId(a) = $from_internal_node_id AND elementId(b) = $to_internal_node_id - MERGE (a)-[:DATA_DOWNLOAD]->(b) + MERGE (a)-[:DATA_INSPECT]->(b) RETURN elementId(a) AS id_1, elementId(b) AS id_2 """ with driver.session() as session: @@ -248,7 +248,7 @@ def create_compress_control_relationship(driver: Driver, from_internal_node_id: record = result.single() return record["id_1"], record["id_2"] -def create_download_control_relationship(driver: Driver, from_internal_node_id: int, to_internal_node_id: int, component_id: str) -> tuple[int,int]: +def create_inspect_control_relationship(driver: Driver, from_internal_node_id: int, to_internal_node_id: int, component_id: str) -> tuple[int,int]: """ Creates a control dependency relationship in Neo4j between the two nodes with Neo4j internal IDs given as parameters. This relationship is an outgoing control edge from the node with internal ID from_internal_node_id @@ -265,7 +265,7 @@ def create_download_control_relationship(driver: Driver, from_internal_node_id: query = """ MATCH (a), (b) WHERE elementId(a) = $from_internal_node_id AND elementId(b) = $to_internal_node_id - MERGE (a)-[:CONTROL_DOWNLOAD {component_id: $component_id}]->(b) + MERGE (a)-[:CONTROL_INSPECT {component_id: $component_id}]->(b) RETURN elementId(a) AS id_1, elementId(b) AS id_2 """ with driver.session() as session: @@ -316,9 +316,9 @@ def simplify_data_and_control_edges(driver: Driver): session.run(create_data_edges_query) create_data_edges_query = """ - MATCH (n1)-[:DATA_DOWNLOAD]->(n:Data), (n)-[:DATA_DOWNLOAD]->(n2) + MATCH (n1)-[:DATA_INSPECT]->(n:Data), (n)-[:DATA_INSPECT]->(n2) WITH n, n1, n2, n.component_id AS component_id, n.data_id AS data_id - MERGE (n1)-[:DATA_DOWNLOAD {component_id: component_id, data_id: data_id}]->(n2) + MERGE (n1)-[:DATA_INSPECT {component_id: component_id, data_id: data_id}]->(n2) """ session.run(create_data_edges_query) diff --git a/neo4j_queries/node_queries.py b/neo4j_queries/node_queries.py index 919f7f87fe4793bdefdbc11afe87e2441047846d..20293c1999bd896cf8c5af273349512eaf7e65d1 100644 --- a/neo4j_queries/node_queries.py +++ b/neo4j_queries/node_queries.py @@ -102,7 +102,7 @@ def ensure_main_in_parameter_node(driver: Driver, node_id: str, prefixed_compone """ else: query = """ - MERGE (n:DownloadInParameter {parameter_id: $node_id, component_id: $component_id}) + MERGE (n:InspectInParameter {parameter_id: $node_id, component_id: $component_id}) RETURN elementId(n) AS node_internal_id, n.parameter_id AS id_property, n.component_id AS component_id_property """ with driver.session() as session: @@ -161,7 +161,7 @@ def ensure_main_out_parameter_node(driver: Driver, node_id: str, prefixed_compon """ else: query = """ - MERGE (n:DownloadOutParameter {parameter_id: $node_id, component_id: $component_id}) + MERGE (n:InspectOutParameter {parameter_id: $node_id, component_id: $component_id}) RETURN elementId(n) AS node_internal_id, n.parameter_id AS id_property, n.component_id AS component_id_property """ with driver.session() as session: