process control dependencies "when" field of steps

f3dbea4b · Chiara Liotta · 12a2097b · f3dbea4b · f3dbea4b · f3dbea4b
Commit f3dbea4b authored 7 months ago by Chiara Liotta
--- a/README.md
+++ b/README.md
@@ -91,3 +91,11 @@ For open source projects, say how it is licensed.

 ## Project status
 If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
+
+## ANTLR
+
+JS:
+1. Download lexer and parser from https://github.com/antlr/grammars-v4/tree/master/javascript/javascript
+2. Download `JavaScriptLexerBase.py`, `JavaScriptParserBase.py`, `transformGrammar.py` from https://github.com/antlr/grammars-v4/tree/master/javascript/javascript/Python3
+3. Run `python transformGrammar.py`
+4. Run `java -jar antlr-4.13.2-complete.jar -o antlr_gen *.g`
--- a/antlr_gen/JavaScriptLexer.interp
+++ b/antlr_gen/JavaScriptLexer.interp
--- a/antlr_gen/JavaScriptLexer.py
+++ b/antlr_gen/JavaScriptLexer.py
--- a/antlr_gen/JavaScriptLexer.tokens
+++ b/antlr_gen/JavaScriptLexer.tokens
+HashBangLine=1
+MultiLineComment=2
+SingleLineComment=3
+RegularExpressionLiteral=4
+OpenBracket=5
+CloseBracket=6
+OpenParen=7
+CloseParen=8
+OpenBrace=9
+TemplateCloseBrace=10
+CloseBrace=11
+SemiColon=12
+Comma=13
+Assign=14
+QuestionMark=15
+QuestionMarkDot=16
+Colon=17
+Ellipsis=18
+Dot=19
+PlusPlus=20
+MinusMinus=21
+Plus=22
+Minus=23
+BitNot=24
+Not=25
+Multiply=26
+Divide=27
+Modulus=28
+Power=29
+NullCoalesce=30
+Hashtag=31
+RightShiftArithmetic=32
+LeftShiftArithmetic=33
+RightShiftLogical=34
+LessThan=35
+MoreThan=36
+LessThanEquals=37
+GreaterThanEquals=38
+Equals_=39
+NotEquals=40
+IdentityEquals=41
+IdentityNotEquals=42
+BitAnd=43
+BitXOr=44
+BitOr=45
+And=46
+Or=47
+MultiplyAssign=48
+DivideAssign=49
+ModulusAssign=50
+PlusAssign=51
+MinusAssign=52
+LeftShiftArithmeticAssign=53
+RightShiftArithmeticAssign=54
+RightShiftLogicalAssign=55
+BitAndAssign=56
+BitXorAssign=57
+BitOrAssign=58
+PowerAssign=59
+NullishCoalescingAssign=60
+ARROW=61
+NullLiteral=62
+BooleanLiteral=63
+DecimalLiteral=64
+HexIntegerLiteral=65
+OctalIntegerLiteral=66
+OctalIntegerLiteral2=67
+BinaryIntegerLiteral=68
+BigHexIntegerLiteral=69
+BigOctalIntegerLiteral=70
+BigBinaryIntegerLiteral=71
+BigDecimalIntegerLiteral=72
+Break=73
+Do=74
+Instanceof=75
+Typeof=76
+Case=77
+Else=78
+New=79
+Var=80
+Catch=81
+Finally=82
+Return=83
+Void=84
+Continue=85
+For=86
+Switch=87
+While=88
+Debugger=89
+Function_=90
+This=91
+With=92
+Default=93
+If=94
+Throw=95
+Delete=96
+In=97
+Try=98
+As=99
+From=100
+Of=101
+Yield=102
+YieldStar=103
+Class=104
+Enum=105
+Extends=106
+Super=107
+Const=108
+Export=109
+Import=110
+Async=111
+Await=112
+Implements=113
+StrictLet=114
+NonStrictLet=115
+Private=116
+Public=117
+Interface=118
+Package=119
+Protected=120
+Static=121
+Identifier=122
+StringLiteral=123
+BackTick=124
+WhiteSpaces=125
+LineTerminator=126
+HtmlComment=127
+CDataComment=128
+UnexpectedCharacter=129
+TemplateStringStartExpression=130
+TemplateStringAtom=131
+'['=5
+']'=6
+'('=7
+')'=8
+'{'=9
+'}'=11
+';'=12
+','=13
+'='=14
+'?'=15
+'?.'=16
+':'=17
+'...'=18
+'.'=19
+'++'=20
+'--'=21
+'+'=22
+'-'=23
+'~'=24
+'!'=25
+'*'=26
+'/'=27
+'%'=28
+'**'=29
+'??'=30
+'#'=31
+'>>'=32
+'<<'=33
+'>>>'=34
+'<'=35
+'>'=36
+'<='=37
+'>='=38
+'=='=39
+'!='=40
+'==='=41
+'!=='=42
+'&'=43
+'^'=44
+'|'=45
+'&&'=46
+'||'=47
+'*='=48
+'/='=49
+'%='=50
+'+='=51
+'-='=52
+'<<='=53
+'>>='=54
+'>>>='=55
+'&='=56
+'^='=57
+'|='=58
+'**='=59
+'??='=60
+'=>'=61
+'null'=62
+'break'=73
+'do'=74
+'instanceof'=75
+'typeof'=76
+'case'=77
+'else'=78
+'new'=79
+'var'=80
+'catch'=81
+'finally'=82
+'return'=83
+'void'=84
+'continue'=85
+'for'=86
+'switch'=87
+'while'=88
+'debugger'=89
+'function'=90
+'this'=91
+'with'=92
+'default'=93
+'if'=94
+'throw'=95
+'delete'=96
+'in'=97
+'try'=98
+'as'=99
+'from'=100
+'of'=101
+'yield'=102
+'yield*'=103
+'class'=104
+'enum'=105
+'extends'=106
+'super'=107
+'const'=108
+'export'=109
+'import'=110
+'async'=111
+'await'=112
+'implements'=113
+'private'=116
+'public'=117
+'interface'=118
+'package'=119
+'protected'=120
+'static'=121
--- a/antlr_gen/JavaScriptLexerBase.py
+++ b/antlr_gen/JavaScriptLexerBase.py
+
+from antlr4 import *
+
+relativeImport = False
+if __name__ is not None and "." in __name__:
+    relativeImport = True
+
+class JavaScriptLexerBase(Lexer):
+    def __init__(self, *args, **kwargs):
+        super(JavaScriptLexerBase, self).__init__(*args, **kwargs)
+
+        """Stores values of nested modes. By default mode is strict or
+        defined externally (useStrictDefault)"""
+        self.scopeStrictModes = []
+        self.lastToken: Token = None
+
+        """Default value of strict mode
+        Can be defined externally by setUseStrictDefault"""
+        self.useStrictDefault = False
+
+        """Current value of strict mode
+        Can be defined during parsing, see StringFunctions.js and StringGlobal.js samples"""
+        self.useStrictCurrent = False
+
+        """Preserve nesting depth of template literals"""
+        self.currentDepth = 0
+        self.templateDepthStack = []
+
+    def IsStartOfFile(self) -> bool:
+        return self.lastToken == null
+    
+    def getStrictDefault(self) -> bool:
+        return self.useStrictDefault
+
+    def setUseStrictDefault(self, value: bool):
+        self.useStrictDefault = value
+        self.useStrictCurrent = value
+
+    def IsStrictMode(self):
+        return self.useStrictCurrent
+
+    def IsInTemplateString(self) -> bool:
+        return len(self.templateDepthStack) > 0 and self.templateDepthStack[-1] == self.currentDepth
+
+    def IsStartOfFile(self):
+        return self.lastToken is None
+
+    def nextToken(self) -> Token:
+        """Return the next token from the character stream and records this last
+        token in case it resides on the default channel. This recorded token
+        is used to determine when the lexer could possibly match a regex
+        literal. Also changes scopeStrictModes stack if tokenize special
+        string 'use strict';
+
+        :return the next token from the character stream."""
+        next_token: Token = super(JavaScriptLexerBase, self).nextToken()
+
+        if next_token.channel == Token.DEFAULT_CHANNEL:
+            self.lastToken = next_token
+
+        return next_token
+
+    def ProcessOpenBrace(self):
+        self.currentDepth += 1
+        self.useStrictCurrent = bool(self.scopeStrictModes) and (True if self.scopeStrictModes[-1] else self.useStrictDefault)
+        self.scopeStrictModes.append(self.useStrictCurrent)
+
+    def ProcessCloseBrace(self):
+        self.useStrictCurrent = bool(self.scopeStrictModes) and (True if self.scopeStrictModes.pop(-1) else self.useStrictDefault)
+        self.currentDepth -= 1
+
+    def ProcessStringLiteral(self):
+        if relativeImport:
+            from .JavaScriptLexer import JavaScriptLexer
+        else:
+            from JavaScriptLexer import JavaScriptLexer
+        if not self.lastToken or self.lastToken.type == JavaScriptLexer.OpenBrace:
+            text = self.text
+            if text == '"use strict"' or text == "'use strict'":
+                if self.scopeStrictModes:
+                    self.scopeStrictModes.pop(-1)
+                self.useStrictCurrent = True
+                self.scopeStrictModes.append(self.useStrictCurrent)
+
+    def ProcessTemplateOpenBrace(self):
+        self.currentDepth += 1
+        self.templateDepthStack.append(self.currentDepth)
+
+    def ProcessTemplateCloseBrace(self):
+        self.templateDepthStack.pop(-1)
+        self.currentDepth -= 1
+
+    def IsRegexPossible(self) -> bool:
+        """Returns {@code true} if the lexer can match a regex literal. """
+        if relativeImport:
+            from .JavaScriptLexer import JavaScriptLexer
+        else:
+            from JavaScriptLexer import JavaScriptLexer
+
+        if not self.lastToken:
+            # No token has been produced yet: at the start of the input,
+            # no division is possible, so a regex literal _is_ possible.
+            return True
+
+        if self.lastToken.type in [
+                JavaScriptLexer.Identifier,
+                JavaScriptLexer.NullLiteral,
+                JavaScriptLexer.BooleanLiteral,
+                JavaScriptLexer.This,
+                JavaScriptLexer.CloseBracket,
+                JavaScriptLexer.CloseParen,
+                JavaScriptLexer.OctalIntegerLiteral,
+                JavaScriptLexer.DecimalLiteral,
+                JavaScriptLexer.HexIntegerLiteral,
+                JavaScriptLexer.StringLiteral,
+                JavaScriptLexer.PlusPlus,
+                JavaScriptLexer.MinusMinus]:
+            return False
+
+        return True
+
+    def reset(self):
+        self.scopeStrictModes = []
+        self.lastToken: Token = None
+        self.useStrictDefault = False
+        self.useStrictCurrent = False
+        self.currentDepth = 0
+        self.templateDepthStack = []
+        super(JavaScriptLexerBase, self).reset()
--- a/antlr_gen/JavaScriptParser.interp
+++ b/antlr_gen/JavaScriptParser.interp
--- a/antlr_gen/JavaScriptParser.py
+++ b/antlr_gen/JavaScriptParser.py
--- a/antlr_gen/JavaScriptParser.tokens
+++ b/antlr_gen/JavaScriptParser.tokens
+HashBangLine=1
+MultiLineComment=2
+SingleLineComment=3
+RegularExpressionLiteral=4
+OpenBracket=5
+CloseBracket=6
+OpenParen=7
+CloseParen=8
+OpenBrace=9
+TemplateCloseBrace=10
+CloseBrace=11
+SemiColon=12
+Comma=13
+Assign=14
+QuestionMark=15
+QuestionMarkDot=16
+Colon=17
+Ellipsis=18
+Dot=19
+PlusPlus=20
+MinusMinus=21
+Plus=22
+Minus=23
+BitNot=24
+Not=25
+Multiply=26
+Divide=27
+Modulus=28
+Power=29
+NullCoalesce=30
+Hashtag=31
+RightShiftArithmetic=32
+LeftShiftArithmetic=33
+RightShiftLogical=34
+LessThan=35
+MoreThan=36
+LessThanEquals=37
+GreaterThanEquals=38
+Equals_=39
+NotEquals=40
+IdentityEquals=41
+IdentityNotEquals=42
+BitAnd=43
+BitXOr=44
+BitOr=45
+And=46
+Or=47
+MultiplyAssign=48
+DivideAssign=49
+ModulusAssign=50
+PlusAssign=51
+MinusAssign=52
+LeftShiftArithmeticAssign=53
+RightShiftArithmeticAssign=54
+RightShiftLogicalAssign=55
+BitAndAssign=56
+BitXorAssign=57
+BitOrAssign=58
+PowerAssign=59
+NullishCoalescingAssign=60
+ARROW=61
+NullLiteral=62
+BooleanLiteral=63
+DecimalLiteral=64
+HexIntegerLiteral=65
+OctalIntegerLiteral=66
+OctalIntegerLiteral2=67
+BinaryIntegerLiteral=68
+BigHexIntegerLiteral=69
+BigOctalIntegerLiteral=70
+BigBinaryIntegerLiteral=71
+BigDecimalIntegerLiteral=72
+Break=73
+Do=74
+Instanceof=75
+Typeof=76
+Case=77
+Else=78
+New=79
+Var=80
+Catch=81
+Finally=82
+Return=83
+Void=84
+Continue=85
+For=86
+Switch=87
+While=88
+Debugger=89
+Function_=90
+This=91
+With=92
+Default=93
+If=94
+Throw=95
+Delete=96
+In=97
+Try=98
+As=99
+From=100
+Of=101
+Yield=102
+YieldStar=103
+Class=104
+Enum=105
+Extends=106
+Super=107
+Const=108
+Export=109
+Import=110
+Async=111
+Await=112
+Implements=113
+StrictLet=114
+NonStrictLet=115
+Private=116
+Public=117
+Interface=118
+Package=119
+Protected=120
+Static=121
+Identifier=122
+StringLiteral=123
+BackTick=124
+WhiteSpaces=125
+LineTerminator=126
+HtmlComment=127
+CDataComment=128
+UnexpectedCharacter=129
+TemplateStringStartExpression=130
+TemplateStringAtom=131
+'['=5
+']'=6
+'('=7
+')'=8
+'{'=9
+'}'=11
+';'=12
+','=13
+'='=14
+'?'=15
+'?.'=16
+':'=17
+'...'=18
+'.'=19
+'++'=20
+'--'=21
+'+'=22
+'-'=23
+'~'=24
+'!'=25
+'*'=26
+'/'=27
+'%'=28
+'**'=29
+'??'=30
+'#'=31
+'>>'=32
+'<<'=33
+'>>>'=34
+'<'=35
+'>'=36
+'<='=37
+'>='=38
+'=='=39
+'!='=40
+'==='=41
+'!=='=42
+'&'=43
+'^'=44
+'|'=45
+'&&'=46
+'||'=47
+'*='=48
+'/='=49
+'%='=50
+'+='=51
+'-='=52
+'<<='=53
+'>>='=54
+'>>>='=55
+'&='=56
+'^='=57
+'|='=58
+'**='=59
+'??='=60
+'=>'=61
+'null'=62
+'break'=73
+'do'=74
+'instanceof'=75
+'typeof'=76
+'case'=77
+'else'=78
+'new'=79
+'var'=80
+'catch'=81
+'finally'=82
+'return'=83
+'void'=84
+'continue'=85
+'for'=86
+'switch'=87
+'while'=88
+'debugger'=89
+'function'=90
+'this'=91
+'with'=92
+'default'=93
+'if'=94
+'throw'=95
+'delete'=96
+'in'=97
+'try'=98
+'as'=99
+'from'=100
+'of'=101
+'yield'=102
+'yield*'=103
+'class'=104
+'enum'=105
+'extends'=106
+'super'=107
+'const'=108
+'export'=109
+'import'=110
+'async'=111
+'await'=112
+'implements'=113
+'private'=116
+'public'=117
+'interface'=118
+'package'=119
+'protected'=120
+'static'=121
--- a/antlr_gen/JavaScriptParserBase.py
+++ b/antlr_gen/JavaScriptParserBase.py
+
+from antlr4 import *
+
+relativeImport = False
+if __name__ is not None and "." in __name__:
+    relativeImport = True
+
+class JavaScriptParserBase(Parser):
+    @staticmethod
+    def parser():
+        if relativeImport:
+            from .JavaScriptParser import JavaScriptParser
+        else:
+            from JavaScriptParser import JavaScriptParser
+        return JavaScriptParser
+
+    def p(self, s : str) -> bool:
+        return self.prev(s)
+
+    def prev(self, s : str) -> bool:
+        return self._input.LT(-1).text == s
+
+    def n(self, s : str) -> bool:
+        return self.next(s)
+
+    def next(self, s: str) -> bool:
+        return self._input.LT(1).text == s
+
+    def notLineTerminator(self) -> bool:
+        JavaScriptParser = self.parser()
+
+        return not self.lineTerminatorAhead()
+
+    def notOpenBraceAndNotFunction(self) -> bool:
+        JavaScriptParser = self.parser()
+
+        nextTokenType = self._input.LT(1).type
+        return nextTokenType != JavaScriptParser.OpenBrace and nextTokenType != JavaScriptParser.Function_
+
+    def closeBrace(self) -> bool:
+        JavaScriptParser = self.parser()
+
+        return self._input.LT(1).type == JavaScriptParser.CloseBrace
+
+    def lineTerminatorAhead(self) -> bool:
+        """
+        Returns {@code true} iff on the current index of the parser's
+        token stream a token exists on the {@code HIDDEN} channel which
+        either is a line terminator, or is a multi line comment that
+        contains a line terminator.
+
+        :return: {@code true} iff on the current index of the parser's
+        token stream a token exists on the {@code HIDDEN} channel which
+        either is a line terminator, or is a multi line comment that
+        contains a line terminator.
+        """
+        JavaScriptParser = self.parser()
+
+        # Get the token ahead of the current index.
+        possibleIndexEosToken: Token = self.getCurrentToken().tokenIndex - 1
+        if (possibleIndexEosToken < 0):
+            return False
+        ahead: Token = self._input.get(possibleIndexEosToken)
+
+        if ahead.channel != Lexer.HIDDEN:
+            # We're only interested in tokens on the HIDDEN channel.
+            return False
+
+        if ahead.type == JavaScriptParser.LineTerminator:
+            # There is definitely a line terminator ahead.
+            return True
+
+        if ahead.type == JavaScriptParser.WhiteSpaces:
+            # Get the token ahead of the current whitespaces.
+            possibleIndexEosToken = self.getCurrentToken().tokenIndex - 2
+            if (possibleIndexEosToken < 0):
+                return False
+            ahead = self._input.get(possibleIndexEosToken)
+
+        # Get the token's text and type.
+        text = ahead.text
+        tokenType = ahead.type
+
+        # Check if the token is, or contains a line terminator.
+
+        return (tokenType == JavaScriptParser.MultiLineComment and (("\r" in text) or ("\n" in text))) or \
+            (tokenType == JavaScriptParser.LineTerminator)
--- a/antlr_gen/JavaScriptParserListener.py
+++ b/antlr_gen/JavaScriptParserListener.py
--- a/antlr_gen/__init__.py
+++ b/antlr_gen/__init__.py
--- a/graph_creation/cst_processing.py
+++ b/graph_creation/cst_processing.py
+from antlr4 import ParserRuleContext
+from neo4j import Driver
+
+from neo4j_queries.edge_queries import create_has_child_relationship
+from neo4j_queries.node_queries import create_ast_node
+
+def traverse_and_create(driver: Driver, tree, parent_node_id=None):
+    # Create a Neo4j node for the current tree node
+    rule_name = type(tree).__name__
+    text = tree.getText() if tree.getText() else None
+
+    current_node_id = create_ast_node(driver, rule_name, text)
+
+    # If there's a parent, create a relationship
+    if parent_node_id is not None:
+        create_has_child_relationship(driver, parent_node_id, current_node_id)
+
+    # Recursively process all children
+    for i in range(tree.getChildCount()):
+        child = tree.getChild(i)
+        traverse_and_create(driver, child, current_node_id)
+
+
+def traverse_when_statement_extract_dependencies(tree: ParserRuleContext) -> list[tuple[str,str]]:
+    """
+    This function traverses a ParserRuleContext tree of a JS expression created by ANTLR
+    to extract dependencies specified in a "when" statement of a CWL workflow step. 
+    Dependencies include references to step input parameter and outputs of other steps, 
+    which are identified and categorized during the traversal.
+
+    Parameters:
+        tree (ParserRuleContext): the tree obtained by parsing a JS expression statement
+        
+    Returns:
+        list[tuple[str,str]]: a list of references to inputs or outputs. Each reference is a tuple.
+            The first element of the tuple is either "parameter" or "step_output", the second parameter is the ID of the element.
+            In the case of the step output, the ID is [workflow-level step ID]/[output ID]
+    """
+    rule_name = type(tree).__name__
+    text = tree.getText() if tree.getText() else None
+    ref_list = []
+
+    # The "when" field of a step can reference:
+    # - inputs (parameters) of that step in the form input.[param ID]
+    # - outputs of different steps in the form steps.[step ID].outputs.[output ID]
+    if rule_name == "MemberDotExpressionContext":
+        split_text = text.split(".")
+        if len(split_text) == 2:
+            if split_text[0] == "inputs":
+                ref_list.append(("parameter", split_text[1])) 
+        elif len(split_text) == 4:
+            if split_text[0] == "steps" and split_text[2] == "outputs":
+                ref_list.append(("step_output", split_text[1] + "/" + split_text[3]))
+
+    # Recursively process all children
+    for i in range(tree.getChildCount()):
+        child = tree.getChild(i)
+        ref_list_child = traverse_when_statement_extract_dependencies(child)
+        ref_list.extend(ref_list_child)
+    
+    return ref_list
--- a/graph_creation/cwl_processing.py
+++ b/graph_creation/cwl_processing.py
 from neo4j import Driver
+from graph_creation.cst_processing import traverse_when_statement_extract_dependencies
 from graph_creation.utils import create_input_nodes_and_relationships, process_source_relationship, resolve_relative_path
-from neo4j_queries.node_queries import ensure_component_node, ensure_data_node, ensure_parameter_node
-from neo4j_queries.edge_queries import create_data_relationship, create_out_param_relationship
+from neo4j_queries.node_queries import ensure_component_node, ensure_data_node, ensure_parameter_node, get_wf_data_nodes_from_step_in_param
+from neo4j_queries.edge_queries import create_control_relationship, create_data_relationship, create_out_param_relationship
 from pathlib import Path

+from parsers.javascript_parsing import parse_javascript_expression_string
+
 # TODO: deal with inputBindings
 def process_cwl_inputs(driver: Driver, cwl_entity: dict) -> None:
    """
@@ -72,14 +75,21 @@ def process_cwl_steps(driver: Driver, cwl_entity: dict) -> None:
    A step can be a Workflow, CommandLineTool or ExpressionTool. 
    For each step, a component node is created with component ID equal to the path of the step.
    Then, the lists of inputs and outputs are processed.
-    For each input, the following nodes and edges are created:
+
+    - For each input, the following nodes and edges are created:
        - in-parameter node with ID as defined in the component and component ID equal to the path of the step
        - a data edge from the step component node to the in-parameter node
        - potentially a data node corresponding to the source of the input, with ID equal to the source ID defined in the outer workflow 
        and component ID equal to the path of the outer workflow
        - potentially a data edge from the in-parameter node to the data node of the source

-    For each output, the following nodes and edges are created:
+    - If the step has a "when" field, then the JS expression is parsed and its dependencies are extracted.
+        - The step is control dependent on data node x with component_id equal to the outer workflow id if:
+            - the when expression mentions a step parameter which is data dependent on x
+            - the when expression mentions the data_id of x
+        - A control edge is created from the step component node to the data node x.
+
+    - For each output, the following nodes and edges are created:
        - out-parameter node with ID as defined in the component and component ID equal to the path of the step
        - a data edge from the out-parameter node to the step component node
        - a data node representing the outer-workflow-level output, with ID equal to [step id]/[output id as defined in workflow]
@@ -119,6 +129,25 @@ def process_cwl_steps(driver: Driver, cwl_entity: dict) -> None:
                    for source_id in input['source']:
                        process_source_relationship(driver, source_id, cwl_entity['path'], param_node_internal_id)

+        # Process the "when" field, aka control dependencies
+        if 'when' in step:
+            when_expr = step['when']
+            expr_tree = parse_javascript_expression_string(when_expr)
+            when_refs = traverse_when_statement_extract_dependencies(expr_tree)
+
+            data_nodes = []
+            for ref in when_refs:
+                ref_id = ref[1]
+                if ref[0] == "parameter":
+                    input_data = get_wf_data_nodes_from_step_in_param(driver, ref_id, step_path, cwl_entity['path'])
+                    data_nodes.extend(input_data)
+                elif ref[0] == "step_output":
+                    step_output = ensure_data_node(driver, ref_id, cwl_entity['path'])[0]
+                    data_nodes.append(step_output)
+
+            for data_node in data_nodes:
+                create_control_relationship(driver, s_node_internal_id, data_node)
+
        # Process the list of outputs of the step
        for output in step['out']:
            # An output can be defined as a dictionary or simply as a string (ID only)

--- a/graph_creation/repo_processing.py
+++ b/graph_creation/repo_processing.py
@@ -22,4 +22,4 @@ def process_repos(repo_list: list[str], driver: Driver) -> None:
            process_cwl_inputs(driver, entity)
            process_cwl_outputs(driver, entity)
            if entity['class'] == 'Workflow':
-                process_cwl_steps(driver, entity, repo)
+                process_cwl_steps(driver, entity)
--- a/neo4j_queries/edge_queries.py
+++ b/neo4j_queries/edge_queries.py
@@ -84,4 +84,55 @@ def create_data_relationship(driver: Driver, from_internal_node_id: int, to_inte
                             to_internal_node_id=to_internal_node_id)
        record = result.single()
        return record["id_1"], record["id_2"]
+    
+
+def create_control_relationship(driver: Driver, from_internal_node_id: int, to_internal_node_id: int)  -> tuple[int,int]:
+    """
+    Creates a control dependency relationship in Neo4j between the two nodes with Neo4j internal IDs given as parameters.
+    This relationship is an outgoing control edge from the node with internal ID from_internal_node_id
+    to the node with internal ID to_internal_node_id.
+
+    Parameters:
+        driver (Driver): the Neo4j driver
+        from_internal_node_id (int): the internal Neo4j ID of the first node
+        to_internal_node_id (int): the internal Neo4j ID of the second node
+
+    Returns:
+        tuple[int,int]: from_internal_node_id, to_internal_node_id
+    """
+    query = """
+    MATCH (a), (b)
+    WHERE elementId(a) = $from_internal_node_id AND elementId(b) = $to_internal_node_id
+    MERGE (a)-[:CONTROL]->(b)
+    RETURN elementId(a) AS id_1, elementId(b) AS id_2
+    """
+    with driver.session() as session:
+        result = session.run(query, from_internal_node_id=from_internal_node_id,
+                             to_internal_node_id=to_internal_node_id)
+        record = result.single()
+        return record["id_1"], record["id_2"]
+    
+def create_has_child_relationship(driver: Driver, parent_internal_node_id: int, child_internal_node_id: int)  -> tuple[int,int]:
+    """
+    Creates a "has child" relationship in Neo4j between the two nodes with Neo4j internal IDs given as parameters.
+    This relationship is an outgoing "has child" edge from the parent node to the child node.
+
+    Parameters:
+        driver (Driver): the Neo4j driver
+        parent_internal_node_id (int): the internal Neo4j ID of the parent node
+        child_internal_node_id (int): the internal Neo4j ID of the child node
+
+    Returns:
+        tuple[int,int]: parent_internal_node_id, child_internal_node_id
+    """
+    query = """
+    MATCH (parent), (child)
+    WHERE elementId(parent) = $parent_id AND elementId(child) = $child_id
+    CREATE (parent)-[:HAS_CHILD]->(child)
+    RETURN elementId(parent) AS id_1, elementId(child) AS id_2
+    """
+    with driver.session() as session:
+        result = session.run(query, parent_id=parent_internal_node_id,
+                             child_id=child_internal_node_id)
+        record = result.single()
        return record["id_1"], record["id_2"]
\ No newline at end of file
--- a/neo4j_queries/node_queries.py
+++ b/neo4j_queries/node_queries.py
@@ -66,7 +66,7 @@ def ensure_data_node(driver: Driver, node_id: str, prefixed_component_id: str) -
        prefixed_component_id (str): the local relative path of the component

    Returns:
-        tuple[int,str,str, str]: the Neoj4 internal ID of the data node, the data ID, the component ID
+        tuple[int,str,str]: the Neoj4 internal ID of the data node, the data ID, the component ID
    """
    component_id = clean_component_id(prefixed_component_id)
    query = """
@@ -77,3 +77,41 @@ def ensure_data_node(driver: Driver, node_id: str, prefixed_component_id: str) -
        result = session.run(query, node_id=node_id, component_id=component_id)
        record = result.single()
        return record["node_internal_id"], record["id_property"], record["component_id_property"]
+    
+
+def create_ast_node(driver, rule, text):
+    query = """
+    CREATE (n:ASTNode {rule: $rule, text: $text})
+    RETURN elementId(n) AS node_id
+    """
+    with driver.session() as session:
+        result = session.run(query, rule=rule, text=text)
+        record = result.single()
+        return record["node_id"]
+    
+def get_wf_data_nodes_from_step_in_param(driver: Driver, param_id: str, prefixed_step_id: str, prefixed_workflow_id: str) -> list[int]:
+    """
+    Retrieves the internal IDs of data nodes (in a Neo4j database) belonging to the workflow with ID workflow_id 
+    such that the in parameter with ID param_id of workflow step step_id has a data dependency on these data nodes.
+    This means that in said workflow these data nodes are injected into the parameter param_id of the step.
+    The ID of the component can be given based on the local relative path, so it is cleaned 
+    before querying Neo4j.
+
+    Parameters:
+        param_id: the parameter ID of the step parameter
+        prefixed_step_id: the unique ID of the step
+        prefixed_workflow_id: the unique ID of the workflow the step is part of
+    
+    Returns:
+        list[int]: the Neo4j internal IDs of the data nodes connected to the parameter node of the step in the mentioned workflow
+    """  
+    step_id = clean_component_id(prefixed_step_id)
+    workflow_id = clean_component_id(prefixed_workflow_id)
+
+    query = """
+    MATCH (n1:Data {component_id: $workflow_id})<-[:DATA]-(n2:Parameter {component_id: $step_id, parameter_type: "in", parameter_id: $param_id})
+    RETURN elementId(n1) AS internal_id
+    """
+    with driver.session() as session:
+        result = session.run(query, workflow_id=workflow_id, step_id=step_id, param_id=param_id)
+        return [record["internal_id"] for record in result]
--- a/parsers/__init__.py
+++ b/parsers/__init__.py
--- a/parsers/javascript_parsing.py
+++ b/parsers/javascript_parsing.py
+from antlr4 import FileStream, CommonTokenStream, InputStream
+from antlr_gen import JavaScriptLexer
+from antlr_gen import JavaScriptParser
+
+JSL = JavaScriptLexer.JavaScriptLexer
+JSP = JavaScriptParser.JavaScriptParser
+
+def parse_javascript_file(file_path):
+    input_stream = FileStream(file_path)
+
+    lexer = JSL(input_stream)
+    token_stream = CommonTokenStream(lexer)
+    parser = JSP(token_stream)
+
+    tree = parser.program()
+
+    return tree 
+
+def parse_javascript_string(js_code):
+    input_stream = InputStream(js_code)
+
+    lexer = JSL(input_stream)
+    token_stream = CommonTokenStream(lexer)
+    parser = JSP(token_stream)
+
+    tree = parser.program()
+
+    return tree 
+
+def parse_javascript_expression_string(js_code):
+    input_stream = InputStream(js_code)
+
+    lexer = JSL(input_stream)
+    token_stream = CommonTokenStream(lexer)
+    parser = JSP(token_stream)
+
+    tree = parser.expressionStatement()
+
+    return tree 
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ python-dotenv
 neo4j
 ruamel.yaml
 cwl-utils
+antlr4-python3-runtime
\ No newline at end of file