diff --git a/ingest_assignment_2.py b/ingest_assignment_2.py index b5c1b0c52460b139601384a6ab176c58add9474f..b045f2656749ab208378f3dc0921789096039f4e 100755 --- a/ingest_assignment_2.py +++ b/ingest_assignment_2.py @@ -1,16 +1,6 @@ #! /home/grange/rucio_dev/singupy.sh """ -Add replication rule based on attributes: rucio add-rule YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED 1 QOS=FAST -Please contact Paul or Aleem for any problem/concern. - -Check rule status: rucio list-rules YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED - -Discover you files: rucio list-dids YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:* --filter type="ALL" ---filter type is necessary since by default rucio lists the datasets. - -Create datasets, upload them, and add different files to them at a later stage. - Locate you file: rucio list-file-replicas YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED Create a simple workflow that access the data and perform actions on it (e.g. get checksum and cross-check it). @@ -19,6 +9,7 @@ Delete file. Delete file from a dataset. Delete dataset.""" +from rucio import client from rucio.client import uploadclient import os from string import hexdigits as hxdraw @@ -27,6 +18,17 @@ import logging import argparse import subprocess as SP +def voms_proxy_init(): + voms = "escape:/escape/lofar" + certkeydir = "/home/grange/.rucio" + certpath = os.path.join(certkeydir, "client.crt") + keypath = os.path.join(certkeydir, "client.key") + + command_name = ["voms-proxy-init",f"--voms={voms}", f"--cert={certpath}", f"--key={keypath}"] + + SP.call(command_name) + + def create_files(gendir): os.mkdir(gendir) # fails if already exists, but that's what I want anyhow. hexdigits = hxdraw[:-6] @@ -47,12 +49,11 @@ def configure_logger(): logger.addHandler(handler) return logger -def upload_files(gendir): +def upload_files(gendir, scope): """ Upload multiple files, automatising the upload procedure, and execute hourly till our next meeting. Diego and Agustin have already a way to upload or register files. Please, follow up with them in case you face issues. """ - scope = "LOFAR_ASTRON_GRANGE" rse = "SARA-DCACHE" lifetime = 12*3600 # 12 hours in seconds logger = configure_logger() @@ -66,29 +67,110 @@ def upload_files(gendir): "lifetime" : lifetime, "register_after_upload" : True} uploaddata.append(uploaddict) - voms = "escape:/escape/lofar" - certkeydir = "/home/grange/.rucio" - certpath = os.path.join(certkeydir, "client.crt") - keypath = os.path.join(certkeydir, "client.key") + + voms_proxy_init() - command_name = ["voms-proxy-init",f"--voms={voms}", f"--cert={certpath}", f"--key={keypath}"] - SP.call(command_name) uploader.upload(uploaddata) # For the hourly version, see hourly_push.sh # $> crontab -l # 28 * 9-13 * * /home/grange/rucio_dev/hourly_push.sh # 28 0-11 14 * * /home/grange/rucio_dev/hourly_push.sh +def add_replication_rule(scope): + """ + Add replication rule based on attributes: rucio add-rule YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED 1 QOS=FAST + Please contact Paul or Aleem for any problem/concern. + + Check rule status: rucio list-rules YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED + """ + filename = random.choice(os.listdir("lofar_data")) + #rc = ruleclient.RuleClient() + rc = client.Client() + did = [{'scope':scope, 'name':filename}] + ncopies = 1 + expression = "QOS=FAST" + lifetime = 12*3600 + rc.add_replication_rule(dids=did, copies=ncopies, rse_expression=expression, lifetime=lifetime) + for rule in rc.list_associated_rules_for_file(scope=scope, name=filename): + print(rule) +def showmyfiles(scope): + """ + Discover you files: rucio list-dids YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:* --filter type="ALL" + --filter type is necessary since by default rucio lists the datasets. + """ + rc = client.Client() + for fil in rc.list_dids(scope,{'name':"*"},type='all'): + print(fil) + +def create_dataset(scope, datasetname): + """ + Create datasets, upload them, and add different files to them at a later stage. + """ + rc = client.Client() + lifetime = 600 + rc.add_dataset(scope, datasetname, lifetime=lifetime) + rse = "SARA-DCACHE" + uploaddata = list() + logger = configure_logger() + + for i in range(10): + filename = f"datafile_{datasetname}_{i}" + with open(filename, 'w') as fhd: + fhd.write("Data file for dataset") + uploaddict = {"path" : filename, + "rse" : rse, + "did_scope" : scope, + "lifetime" : lifetime, + "dataset_scope": scope, + "dataset_name": datasetname, + "register_after_upload" : True} + uploaddata.append(uploaddict) + + voms_proxy_init() + uploader = uploadclient.UploadClient(logger=logger) + uploader.upload(uploaddata) + uploaddata = list() + datasetregdata = list() + for i in range(10): + filename = f"datafile_{datasetname}_{i}_addedlater" + with open(filename, 'w') as fhd: + fhd.write("Data file for dataset") + uploaddict = {"path" : filename, + "rse" : rse, + "did_scope" : scope, + "lifetime" : lifetime, + "register_after_upload" : True} + dataset_regdict = {'scope' : scope, + 'name' : filename} + uploaddata.append(uploaddict) + datasetregdata.append(dataset_regdict) + + uploader.upload(uploaddata) + rc.add_files_to_dataset(scope, datasetname, datasetregdata) + + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run the second data exercise.') parser.add_argument("--init", "-i", action="store_true", help="Create dummy data for upload") parser.add_argument("--data-dir", "-d", help="Directory with the data to upload, or to create with init.", default="lofar_data") parser.add_argument("--upload", "-u", help="Upload the files.", action="store_true") + parser.add_argument("--replicate", "-r", help="Add replication rule.", action="store_true") + parser.add_argument("--showfiles", "-s", help="Show all files in scope", action="store_true") + parser.add_argument("--create-dataset", "-c", help="Create data sets", action="store_true") args = parser.parse_args() + scope = "LOFAR_ASTRON_GRANGE" + dataset_name = "Dataset_for_assignment_6" + if args.init: create_files(args.data_dir) elif args.upload: - upload_files(args.data_dir) + upload_files(args.data_dir, scope) + elif args.replicate: + add_replication_rule(scope) + elif args.showfiles: + showmyfiles(scope) + elif args.create_dataset: + create_dataset(scope, dataset_name)