from pathlib import Path import ruamel.yaml import chardet def get_cwl_from_repo(repo_path: str) -> list[dict]: """ Processes all CWL (Common Workflow Language) files in a given repository. Parameters: repo_path (str): The path to the local repository containing CWL files. Returns: list[dict]: list of dictionaries representing parsed CWL files. """ cwl_entities = [] # Recursively find all CWL files in the repository pathlist = list(Path(repo_path).rglob("*.cwl")) for path in pathlist: processed_cwl = process_cwl_file(str(path)) cwl_entities.append(processed_cwl) return cwl_entities def process_cwl_file(path: str) -> dict: """ Processes a Common Workflow Language (CWL) file by detecting its encoding and parsing it as YAML. Parameters: path (str): The file path to the CWL file. Returns: dict: A dictionary representation of the YAML content, with an additional 'path' key containing the file path. Notes: - Uses `chardet` to detect file encoding, ensuring compatibility with non-UTF-8 encoded files. - Uses `ruamel.yaml` for YAML parsing to preserve formatting and ordering. """ # Detect file encoding to handle non-UTF-8 encoded files with open(path, 'rb') as file: raw_data = file.read() result = chardet.detect(raw_data) encoding = result['encoding'] # Open the file using the detected encoding and parse it as YAML with open(path, "r", encoding=encoding) as file: yaml = ruamel.yaml.YAML() yaml_dict = yaml.load(file) # Add the file path to the dictionary for reference yaml_dict['path'] = path return yaml_dict