diff --git a/.gitignore b/.gitignore index 4c364784856c6b77c19a3e07da43112fb6871ca6..33f013778abca1a1e9a97aed8bf9dcb3f5c35936 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .venv/ repos/ +**/__pycache__/ +*.py[cod] Neo4j-25ebc0db-Created-2024-11-17.txt \ No newline at end of file diff --git a/graph_creation/__pycache__/__init__.cpython-312.pyc b/graph_creation/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index dbad6cf95681231d978322d11e618805beb3bb82..0000000000000000000000000000000000000000 Binary files a/graph_creation/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/graph_creation/__pycache__/cwl_parsing.cpython-312.pyc b/graph_creation/__pycache__/cwl_parsing.cpython-312.pyc deleted file mode 100644 index 58d71c29805070e2d59eda693a27cace85366622..0000000000000000000000000000000000000000 Binary files a/graph_creation/__pycache__/cwl_parsing.cpython-312.pyc and /dev/null differ diff --git a/graph_creation/__pycache__/cwl_processing.cpython-312.pyc b/graph_creation/__pycache__/cwl_processing.cpython-312.pyc deleted file mode 100644 index 232a17bed36eb80bed003a243e6982549795994b..0000000000000000000000000000000000000000 Binary files a/graph_creation/__pycache__/cwl_processing.cpython-312.pyc and /dev/null differ diff --git a/graph_creation/__pycache__/repo_processing.cpython-312.pyc b/graph_creation/__pycache__/repo_processing.cpython-312.pyc deleted file mode 100644 index 9c48a27062eeca18a32843f0a54b79e7c414a85e..0000000000000000000000000000000000000000 Binary files a/graph_creation/__pycache__/repo_processing.cpython-312.pyc and /dev/null differ diff --git a/graph_creation/__pycache__/utils.cpython-312.pyc b/graph_creation/__pycache__/utils.cpython-312.pyc deleted file mode 100644 index 20e22c7ac192b92d0f4fd508e6ec00afe23d91f3..0000000000000000000000000000000000000000 Binary files a/graph_creation/__pycache__/utils.cpython-312.pyc and /dev/null differ diff --git a/graph_creation/cwl_processing.py b/graph_creation/cwl_processing.py index 847d7dffdd2987f1841aeb763074d3ee910bfcce..fe231abd33ed38e1bc257d96d899e4f04e7f200f 100644 --- a/graph_creation/cwl_processing.py +++ b/graph_creation/cwl_processing.py @@ -1,9 +1,10 @@ +from neo4j import Driver from graph_creation.utils import create_input_nodes_and_relationships, process_source_relationship from neo4j_queries.node_queries import ensure_component_node, ensure_data_node, ensure_parameter_node from neo4j_queries.edge_queries import create_data_relationship, create_out_param_relationship from pathlib import Path -def process_cwl_inputs(driver, cwl_entity: dict): +def process_cwl_inputs(driver: Driver, cwl_entity: dict) -> None: component_id = cwl_entity['path'] if type(cwl_entity['inputs']) == list: for input in cwl_entity['inputs']: @@ -13,7 +14,7 @@ def process_cwl_inputs(driver, cwl_entity: dict): for key in cwl_entity['inputs'].keys(): create_input_nodes_and_relationships(driver, key, component_id) -def process_cwl_outputs(driver, cwl_entity: dict): +def process_cwl_outputs(driver: Driver, cwl_entity: dict) -> None: component_id = cwl_entity['path'] for output in cwl_entity['outputs']: if type(output) == dict: @@ -29,7 +30,7 @@ def process_cwl_outputs(driver, cwl_entity: dict): for o in output['outputSource']: process_source_relationship(driver, o, component_id, param_node_internal_id) -def process_cwl_steps(driver, cwl_entity: dict, repo: str): +def process_cwl_steps(driver: Driver, cwl_entity: dict, repo: str) -> None: for step in cwl_entity['steps']: combined_path = Path(repo) / step['run'] step_path = str(combined_path) diff --git a/graph_creation/repo_processing.py b/graph_creation/repo_processing.py index c22cdfbe5225d49a0ad65ef92338d6867fd2ba57..e36dd5f4d68432f1f6815fd166f35d5641345def 100644 --- a/graph_creation/repo_processing.py +++ b/graph_creation/repo_processing.py @@ -1,8 +1,9 @@ +from neo4j import Driver from graph_creation.cwl_parsing import get_cwl_from_repo from graph_creation.cwl_processing import process_cwl_inputs, process_cwl_outputs, process_cwl_steps from neo4j_queries.node_queries import ensure_component_node -def process_repos(repo_list: list, driver): +def process_repos(repo_list: list[str], driver: Driver) -> None: cwl_entities = {} for repo in repo_list: cwl_entities[repo]= get_cwl_from_repo(repo) diff --git a/graph_creation/utils.py b/graph_creation/utils.py index bf44831f498e6df51e2d72ebf8e0d5f0569c03cb..de3dabd54246c84c101b53c667b87a06412ae755 100644 --- a/graph_creation/utils.py +++ b/graph_creation/utils.py @@ -1,7 +1,8 @@ +from neo4j import Driver from neo4j_queries.node_queries import ensure_data_node, ensure_parameter_node from neo4j_queries.edge_queries import create_data_relationship, create_in_param_relationship -def create_input_nodes_and_relationships(driver, input_id, component_id): +def create_input_nodes_and_relationships(driver: Driver, input_id: str, component_id: str) -> None: # Create in-parameter node i_node with id = i.id and component_id = c_node.id param_node = ensure_parameter_node(driver, input_id, component_id, 'in') param_node_internal_id = param_node[0] @@ -13,7 +14,7 @@ def create_input_nodes_and_relationships(driver, input_id, component_id): # Create a data edge from i_data_node to i_node create_data_relationship(driver, data_node_internal_id, param_node_internal_id) -def process_source_relationship(driver, source_id, component_id, param_node_internal_id): +def process_source_relationship(driver: Driver, source_id: str, component_id: str, param_node_internal_id: str) -> None: data_node = ensure_data_node(driver, source_id, component_id) data_node_internal_id = data_node[0] create_data_relationship(driver, param_node_internal_id, data_node_internal_id) \ No newline at end of file diff --git a/main.py b/main.py index 1a9e5327707c434f5206c26cdb881097b91ff98f..a961ca2d5323b5d93a7287fde7ceabbc8841a6b4 100644 --- a/main.py +++ b/main.py @@ -5,7 +5,7 @@ import os import gitlab import subprocess -def clone_repos(repo_list: list, folder_name: str): +def clone_repos(repo_list: list[str], folder_name: str): gl = gitlab.Gitlab('https://git.astron.nl') projects = gl.projects.list(iterator=True, get_all=True) for project in projects: @@ -17,7 +17,7 @@ def clone_repos(repo_list: list, folder_name: str): if __name__ == '__main__': relevant_repos = ['ldv/imaging_compress_pipeline'] folder = 'repos' - clone_repos(relevant_repos) + clone_repos(relevant_repos, folder) load_status = dotenv.load_dotenv("Neo4j-25ebc0db-Created-2024-11-17.txt") if load_status is False: diff --git a/neo4j_queries/__pycache__/__init__.cpython-312.pyc b/neo4j_queries/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index a7189352bf21b6440c0bba245ba0830a5ca09c06..0000000000000000000000000000000000000000 Binary files a/neo4j_queries/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/neo4j_queries/__pycache__/edge_queries.cpython-312.pyc b/neo4j_queries/__pycache__/edge_queries.cpython-312.pyc deleted file mode 100644 index 41101cd7aaf942db65346904cc0998e1b13494fa..0000000000000000000000000000000000000000 Binary files a/neo4j_queries/__pycache__/edge_queries.cpython-312.pyc and /dev/null differ diff --git a/neo4j_queries/__pycache__/node_queries.cpython-312.pyc b/neo4j_queries/__pycache__/node_queries.cpython-312.pyc deleted file mode 100644 index 26b0b655e6fc677c6b4e3f5e389f1a50654b52d3..0000000000000000000000000000000000000000 Binary files a/neo4j_queries/__pycache__/node_queries.cpython-312.pyc and /dev/null differ diff --git a/neo4j_queries/__pycache__/utils.cpython-312.pyc b/neo4j_queries/__pycache__/utils.cpython-312.pyc deleted file mode 100644 index 1071760f3536644afec6f0c65efebc415c6af69d..0000000000000000000000000000000000000000 Binary files a/neo4j_queries/__pycache__/utils.cpython-312.pyc and /dev/null differ diff --git a/neo4j_queries/edge_queries.py b/neo4j_queries/edge_queries.py index f0ee233c7d510eb9d79c34f45e477ead6b06254f..ff48f4cb87200dcfb854349f4da91e84ab3be1e4 100644 --- a/neo4j_queries/edge_queries.py +++ b/neo4j_queries/edge_queries.py @@ -1,6 +1,7 @@ +from neo4j import Driver from neo4j_queries.utils import clean_component_id -def create_in_param_relationship(driver, prefixed_component_id, parameter_internal_id): +def create_in_param_relationship(driver: Driver, prefixed_component_id: str, parameter_internal_id: int) -> tuple[str,str]: component_id = clean_component_id(prefixed_component_id) query = """ MATCH (c:Component {component_id: $component_id}), (p) @@ -14,7 +15,7 @@ def create_in_param_relationship(driver, prefixed_component_id, parameter_intern record = result.single() return record["component_id"], record["parameter_id"] -def create_out_param_relationship(driver, prefixed_component_id, parameter_internal_id): +def create_out_param_relationship(driver: Driver, prefixed_component_id: str, parameter_internal_id: int) -> tuple[str,str]: component_id = clean_component_id(prefixed_component_id) query = """ MATCH (c:Component {component_id: $component_id}), (p) @@ -28,7 +29,7 @@ def create_out_param_relationship(driver, prefixed_component_id, parameter_inter record = result.single() return record["component_id"], record["parameter_id"] -def create_data_relationship(driver, from_internal_node_id, to_internal_node_id): +def create_data_relationship(driver: Driver, from_internal_node_id: int, to_internal_node_id: int) -> tuple[int,int]: query = """ MATCH (a), (b) WHERE id(a) = $from_internal_node_id AND id(b) = $to_internal_node_id diff --git a/neo4j_queries/node_queries.py b/neo4j_queries/node_queries.py index b78b58bbfec744769c1130ae97da9ac873146192..7bdd951355ca4295844823209c65ba2ebd6a26a9 100644 --- a/neo4j_queries/node_queries.py +++ b/neo4j_queries/node_queries.py @@ -1,7 +1,8 @@ +from neo4j import Driver from neo4j_queries.utils import clean_component_id -def ensure_component_node(driver, prefixed_component_id): +def ensure_component_node(driver: Driver, prefixed_component_id: str) -> tuple[int,str]: component_id = clean_component_id(prefixed_component_id) query = """ MERGE (c:Component {component_id: $component_id}) @@ -12,7 +13,8 @@ def ensure_component_node(driver, prefixed_component_id): record = result.single() return record["node_internal_id"], record["id_property"] -def ensure_parameter_node(driver, node_id, prefixed_component_id, param_type): +def ensure_parameter_node(driver: Driver, node_id: str, prefixed_component_id: str, param_type: str) \ + -> tuple[int,str,str,str]: component_id = clean_component_id(prefixed_component_id) query = """ MERGE (n:Parameter {parameter_id: $node_id, component_id: $component_id}) @@ -27,7 +29,7 @@ def ensure_parameter_node(driver, node_id, prefixed_component_id, param_type): record = result.single() return record["node_internal_id"], record["id_property"], record["component_id_property"], record['parameter_type_property'] -def ensure_data_node(driver, node_id, prefixed_component_id): +def ensure_data_node(driver: Driver, node_id: str, prefixed_component_id: str) -> tuple[int,str,str]: component_id = clean_component_id(prefixed_component_id) query = """ MERGE (n:Data {data_id: $node_id, component_id: $component_id})