From 12a2097b410139eae70dbd031a916b9d3a4487ff Mon Sep 17 00:00:00 2001
From: Chiara Liotta <liotta@astron.nl>
Date: Fri, 29 Nov 2024 17:45:36 +0100
Subject: [PATCH] fix docs and resolve step path

---
 graph_creation/cwl_parsing.py     |  4 +--
 graph_creation/cwl_processing.py  | 24 +++++++++--------
 graph_creation/repo_processing.py |  4 +--
 graph_creation/utils.py           | 44 +++++++++++++++++++++++++------
 main.py                           |  4 +--
 neo4j_queries/edge_queries.py     | 35 ++++++++++++------------
 neo4j_queries/node_queries.py     | 32 +++++++++++-----------
 7 files changed, 89 insertions(+), 58 deletions(-)

diff --git a/graph_creation/cwl_parsing.py b/graph_creation/cwl_parsing.py
index 53dc061..36baae1 100644
--- a/graph_creation/cwl_parsing.py
+++ b/graph_creation/cwl_parsing.py
@@ -9,10 +9,10 @@ def get_cwl_from_repo(repo_path: str) -> list[dict]:
     The path is saved using the key 'path' with value equal to the relative path of the CWL file.
 
     Parameters:
-    repo_path (str): the path of the local repository
+        repo_path (str): the path of the local repository
 
     Returns:
-    list[dict]: a list of dictonaries, each dictionary is a parsed CWL file
+        list[dict]: a list of dictonaries, each dictionary is a parsed CWL file
     """
     cwl_entities = []
     pathlist = Path(repo_path).glob('**/*.cwl')
diff --git a/graph_creation/cwl_processing.py b/graph_creation/cwl_processing.py
index 2446396..57ab18f 100644
--- a/graph_creation/cwl_processing.py
+++ b/graph_creation/cwl_processing.py
@@ -1,5 +1,5 @@
 from neo4j import Driver
-from graph_creation.utils import create_input_nodes_and_relationships, process_source_relationship
+from graph_creation.utils import create_input_nodes_and_relationships, process_source_relationship, resolve_relative_path
 from neo4j_queries.node_queries import ensure_component_node, ensure_data_node, ensure_parameter_node
 from neo4j_queries.edge_queries import create_data_relationship, create_out_param_relationship
 from pathlib import Path
@@ -15,8 +15,8 @@ def process_cwl_inputs(driver: Driver, cwl_entity: dict) -> None:
     - a data edge from the data node to the the in-parameter node
 
     Parameters:
-    driver (Driver): the driver used to connect to Neo4j
-    cwl_entity (dict): the dictionary containing the parsed contents of the CWL component
+        driver (Driver): the driver used to connect to Neo4j
+        cwl_entity (dict): the dictionary containing the parsed contents of the CWL component
     """
     component_id = cwl_entity['path']
     # Inputs can be defined a list or a dictionary
@@ -43,8 +43,8 @@ def process_cwl_outputs(driver: Driver, cwl_entity: dict) -> None:
     - a data edge from the out-parameter node to the data node
 
     Parameters:
-    driver (Driver): the driver used to connect to Neo4j
-    cwl_entity (dict): the dictionary containing the parsed contents of the CWL component
+        driver (Driver): the driver used to connect to Neo4j
+        cwl_entity (dict): the dictionary containing the parsed contents of the CWL component
     """
     component_id = cwl_entity['path']
     for output in cwl_entity['outputs']:
@@ -66,7 +66,7 @@ def process_cwl_outputs(driver: Driver, cwl_entity: dict) -> None:
                     for source_id in output['outputSource']:
                         process_source_relationship(driver, source_id, component_id, param_node_internal_id)
    
-def process_cwl_steps(driver: Driver, cwl_entity: dict, repo_path: str) -> None:
+def process_cwl_steps(driver: Driver, cwl_entity: dict) -> None:    
     """
     Processes the steps of a CWL Workflow component( which we will refer to as outer workflow component). 
     A step can be a Workflow, CommandLineTool or ExpressionTool. 
@@ -87,14 +87,16 @@ def process_cwl_steps(driver: Driver, cwl_entity: dict, repo_path: str) -> None:
     - a data edge from the out-parameter node to the data node
 
     Parameters:
-    driver (Driver): the driver used to connect to Neo4j
-    cwl_entity (dict): the dictionary containing the parsed contents of the CWL component
-    repo_path (str): the path of the repository that contains the CWL component
+        driver (Driver): the driver used to connect to Neo4j
+        cwl_entity (dict): the dictionary containing the parsed contents of the CWL component
     """
     for step in cwl_entity['steps']:
+
         # Retrieve path of the step
-        combined_path = Path(repo_path) / step['run']
-        step_path = str(combined_path)
+        workflow_folder = Path(cwl_entity['path']).parent
+        full_step_path = workflow_folder / Path(step['run'])
+        step_path = str(resolve_relative_path(full_step_path))
+
         # Create the step component node with ID equal to the step 
         s_node = ensure_component_node(driver, step_path)
         s_node_internal_id = s_node[0]
diff --git a/graph_creation/repo_processing.py b/graph_creation/repo_processing.py
index 9e53fbb..34850ab 100644
--- a/graph_creation/repo_processing.py
+++ b/graph_creation/repo_processing.py
@@ -9,8 +9,8 @@ def process_repos(repo_list: list[str], driver: Driver) -> None:
     the function parses the CWL files and turns them into a Neo4j dependency graph.
 
     Parameters:
-    repo_list (list[str]): a list of paths to local repositories
-    driver (Driver): a Neo4j driver
+        repo_list (list[str]): a list of paths to local repositories
+        driver (Driver): a Neo4j driver
     """
     cwl_entities = {}
     for repo in repo_list:
diff --git a/graph_creation/utils.py b/graph_creation/utils.py
index 86e0018..0b62a0e 100644
--- a/graph_creation/utils.py
+++ b/graph_creation/utils.py
@@ -1,3 +1,4 @@
+from pathlib import Path
 from neo4j import Driver
 from neo4j_queries.node_queries import ensure_data_node, ensure_parameter_node
 from neo4j_queries.edge_queries import create_data_relationship, create_in_param_relationship
@@ -12,9 +13,9 @@ def create_input_nodes_and_relationships(driver: Driver, input_id: str, componen
     - a data edge from the data node to the the in-parameter node
 
     Parameters:
-    driver (Driver): the driver used to connect to Neo4j
-    input_id (str): the ID of the input as defined in the CWL component
-    component_id (str): the unique ID of the CWL component (its path)
+        driver (Driver): the driver used to connect to Neo4j
+        input_id (str): the ID of the input as defined in the CWL component
+        component_id (str): the unique ID of the CWL component (its path)
     """
     # Create in-parameter with the parameter ID as defined in the component and component ID equal to the path of the componet
     param_node = ensure_parameter_node(driver, input_id, component_id, 'in')
@@ -36,11 +37,38 @@ def process_source_relationship(driver: Driver, source_id: str, component_id: st
     - a data edge from the parameter node to the data node
 
     Parameters:
-    driver (Driver): the driver used to connect to Neo4j
-    source_id (str): the ID of the data that functions as a source for the parameter
-    component_id (str): the unique ID of the CWL component (its path)
-    param_node_internal_id (int): the unique ID of the parameter node as defined internally by Neo4j
+        driver (Driver): the driver used to connect to Neo4j
+        source_id (str): the ID of the data that functions as a source for the parameter
+        component_id (str): the unique ID of the CWL component (its path)
+        param_node_internal_id (int): the unique ID of the parameter node as defined internally by Neo4j
     """
     data_node = ensure_data_node(driver, source_id, component_id)
     data_node_internal_id = data_node[0]
-    create_data_relationship(driver, param_node_internal_id, data_node_internal_id)
\ No newline at end of file
+    create_data_relationship(driver, param_node_internal_id, data_node_internal_id)
+
+def resolve_relative_path(path: Path)-> Path:
+    """
+    Resolves a relative path by simplifying `.` (current directory) 
+    and `..` (parent directory) components without converting it to an absolute path.
+
+    Parameters:
+        path (Path): the input Path object to be resolved
+
+    Returns:
+        Path: a new object representing the simplified relative path
+
+    Example:
+        >>> resolve_relative_path(Path("x/y/../z"))
+        Path('x/z')
+
+        >>> resolve_relative_path(Path("./a/./b/c/../d"))
+        Path('a/b/d')
+    """
+    parts = []
+    for part in path.parts:
+        if part == "..":
+            if parts:
+                parts.pop()
+        elif part != ".":
+            parts.append(part)
+    return Path(*parts)
\ No newline at end of file
diff --git a/main.py b/main.py
index 81e6ccf..55a7807 100644
--- a/main.py
+++ b/main.py
@@ -11,8 +11,8 @@ def clone_repos(repo_list: list[str], folder_name: str) -> None:
     the mentioned repositories are cloned into the mentioned folder.
 
     Parameters:
-    repo_list (list[str]): list of relative paths to ASTRON GitLab repositories
-    folder_name (str): the name of the folder to clone the repos into
+        repo_list (list[str]): list of relative paths to ASTRON GitLab repositories
+        folder_name (str): the name of the folder to clone the repos into
     """
     gl = gitlab.Gitlab('https://git.astron.nl')
     projects = gl.projects.list(iterator=True, get_all=True)
diff --git a/neo4j_queries/edge_queries.py b/neo4j_queries/edge_queries.py
index c397009..a8bd1d2 100644
--- a/neo4j_queries/edge_queries.py
+++ b/neo4j_queries/edge_queries.py
@@ -10,19 +10,19 @@ def create_in_param_relationship(driver: Driver, prefixed_component_id: str, par
     before querying Neo4j.
 
     Parameters:
-    driver (Driver): the Neo4j driver
-    prefixed_component_id (str): the local relative path of the component
-    parameter_internal_id (int): the internal Neo4j ID of the in-parameter node
+        driver (Driver): the Neo4j driver
+        prefixed_component_id (str): the local relative path of the component
+        parameter_internal_id (int): the internal Neo4j ID of the in-parameter node
 
     Returns:
-    tuple[str,str]: the component ID of the component, the parameter ID of the parameter
+        tuple[str,str]: the component ID of the component, the parameter ID of the parameter
     """
     component_id = clean_component_id(prefixed_component_id)
     query = """
     MATCH (c:Component {component_id: $component_id}), (p)
-    WHERE id(p) = $parameter_internal_id
+    WHERE elementId(p) = $parameter_internal_id
     MERGE (c)-[:DATA]->(p)
-    RETURN c.id AS component_id, p.parameter_id AS parameter_id
+    RETURN c.component_id AS component_id, p.parameter_id AS parameter_id
     """
     with driver.session() as session:
         result = session.run(query, component_id=component_id, 
@@ -39,17 +39,17 @@ def create_out_param_relationship(driver: Driver, prefixed_component_id: str, pa
     before querying Neo4j.
 
     Parameters:
-    driver (Driver): the Neo4j driver
-    prefixed_component_id (str): the local relative path of the component
-    parameter_internal_id (int): the internal Neo4j ID of the out-parameter node
+        driver (Driver): the Neo4j driver
+        prefixed_component_id (str): the local relative path of the component
+        parameter_internal_id (int): the internal Neo4j ID of the out-parameter node
 
     Returns:
-    tuple[str,str]: the component ID of the component, the parameter ID of the parameter
+        tuple[str,str]: the component ID of the component, the parameter ID of the parameter
     """
     component_id = clean_component_id(prefixed_component_id)
     query = """
     MATCH (c:Component {component_id: $component_id}), (p)
-    WHERE id(p) = $parameter_internal_id
+    WHERE elementId(p) = $parameter_internal_id
     MERGE (c)<-[:DATA]-(p)
     RETURN c.component_id AS component_id, p.parameter_id AS parameter_id
     """
@@ -66,21 +66,22 @@ def create_data_relationship(driver: Driver, from_internal_node_id: int, to_inte
     to the node with internal ID to_internal_node_id.
 
     Parameters:
-    driver (Driver): the Neo4j driver
-    from_internal_node_id (int): the internal Neo4j ID of the first node
-    to_internal_node_id (int): the internal Neo4j ID of the second node
+        driver (Driver): the Neo4j driver
+        from_internal_node_id (int): the internal Neo4j ID of the first node
+        to_internal_node_id (int): the internal Neo4j ID of the second node
 
     Returns:
-    tuple[int,int]: from_internal_node_id, to_internal_node_id
+        tuple[int,int]: from_internal_node_id, to_internal_node_id
     """
     query = """
     MATCH (a), (b)
-    WHERE id(a) = $from_internal_node_id AND id(b) = $to_internal_node_id
+    WHERE elementId(a) = $from_internal_node_id AND elementId(b) = $to_internal_node_id
     MERGE (a)-[:DATA]->(b)
-    RETURN a.id AS id_1, b.id AS id_2
+    RETURN elementId(a) AS id_1, elementId(b) AS id_2
     """
     with driver.session() as session:
         result = session.run(query, from_internal_node_id=from_internal_node_id,
                              to_internal_node_id=to_internal_node_id)
         record = result.single()
+        return record["id_1"], record["id_2"]
         return record["id_1"], record["id_2"]
\ No newline at end of file
diff --git a/neo4j_queries/node_queries.py b/neo4j_queries/node_queries.py
index b2dbb9f..21d1f76 100644
--- a/neo4j_queries/node_queries.py
+++ b/neo4j_queries/node_queries.py
@@ -9,16 +9,16 @@ def ensure_component_node(driver: Driver, prefixed_component_id: str) -> tuple[i
     before querying Neo4j.
 
     Parameters:
-    driver (Driver): the Neo4j driver
-    prefixed_component_id (str): the local relative path of the component
+        driver (Driver): the Neo4j driver
+        prefixed_component_id (str): the local relative path of the component
 
     Returns:
-    tuple[int,str]: the Neoj4 internal ID of the component node, the component ID of the component
+        tuple[int,str]: the Neoj4 internal ID of the component node, the component ID of the component
     """
     component_id = clean_component_id(prefixed_component_id)
     query = """
     MERGE (c:Component {component_id: $component_id})
-    RETURN id(c) AS node_internal_id, c.component_id AS component_id
+    RETURN elementId(c) AS node_internal_id, c.component_id AS component_id
     """
     with driver.session() as session:
         result = session.run(query, component_id=component_id)
@@ -34,18 +34,18 @@ def ensure_parameter_node(driver: Driver, node_id: str, prefixed_component_id: s
     before querying Neo4j.
 
     Parameters:
-    driver (Driver): the Neo4j driver
-    node_id (str): the ID of the parameter
-    prefixed_component_id (str): the local relative path of the component
-    param_type (str): the type of the parameter ('in' or 'out')
+        driver (Driver): the Neo4j driver
+        node_id (str): the ID of the parameter
+        prefixed_component_id (str): the local relative path of the component
+        param_type (str): the type of the parameter ('in' or 'out')
 
     Returns:
-    tuple[int,str,str, str]: the Neoj4 internal ID of the parameter node, the parameter ID, the component ID, the parameter type
+        tuple[int,str,str, str]: the Neoj4 internal ID of the parameter node, the parameter ID, the component ID, the parameter type
     """
     component_id = clean_component_id(prefixed_component_id)
     query = """
     MERGE (n:Parameter {parameter_id: $node_id, component_id: $component_id, parameter_type: $param_type})
-    RETURN id(n) AS node_internal_id, n.parameter_id AS id_property, n.component_id AS component_id_property,
+    RETURN elementId(n) AS node_internal_id, n.parameter_id AS id_property, n.component_id AS component_id_property,
         n.parameter_type AS parameter_type_property
     """
     with driver.session() as session:
@@ -61,19 +61,19 @@ def ensure_data_node(driver: Driver, node_id: str, prefixed_component_id: str) -
     before querying Neo4j.
 
     Parameters:
-    driver (Driver): the Neo4j driver
-    node_id (str): the ID of the data 
-    prefixed_component_id (str): the local relative path of the component
+        driver (Driver): the Neo4j driver
+        node_id (str): the ID of the data 
+        prefixed_component_id (str): the local relative path of the component
 
     Returns:
-    tuple[int,str,str, str]: the Neoj4 internal ID of the data node, the data ID, the component ID
+        tuple[int,str,str, str]: the Neoj4 internal ID of the data node, the data ID, the component ID
     """
     component_id = clean_component_id(prefixed_component_id)
     query = """
     MERGE (n:Data {data_id: $node_id, component_id: $component_id})
-    RETURN id(n) AS node_internal_id, n.data_id AS id_property, n.component_id AS component_id_property
+    RETURN elementId(n) AS node_internal_id, n.data_id AS id_property, n.component_id AS component_id_property
     """
     with driver.session() as session:
         result = session.run(query, node_id=node_id, component_id=component_id)
         record = result.single()
-        return record["node_internal_id"], record["id_property"], record["component_id_property"]
\ No newline at end of file
+        return record["node_internal_id"], record["id_property"], record["component_id_property"]
-- 
GitLab