diff --git a/ingest_assignment_2.py b/ingest_assignment_2.py new file mode 100755 index 0000000000000000000000000000000000000000..b5c1b0c52460b139601384a6ab176c58add9474f --- /dev/null +++ b/ingest_assignment_2.py @@ -0,0 +1,94 @@ +#! /home/grange/rucio_dev/singupy.sh +""" + +Add replication rule based on attributes: rucio add-rule YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED 1 QOS=FAST +Please contact Paul or Aleem for any problem/concern. + +Check rule status: rucio list-rules YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED + +Discover you files: rucio list-dids YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:* --filter type="ALL" +--filter type is necessary since by default rucio lists the datasets. + +Create datasets, upload them, and add different files to them at a later stage. + +Locate you file: rucio list-file-replicas YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED + +Create a simple workflow that access the data and perform actions on it (e.g. get checksum and cross-check it). + +Delete file. + +Delete file from a dataset. +Delete dataset.""" +from rucio.client import uploadclient +import os +from string import hexdigits as hxdraw +import random +import logging +import argparse +import subprocess as SP + +def create_files(gendir): + os.mkdir(gendir) # fails if already exists, but that's what I want anyhow. + hexdigits = hxdraw[:-6] + obsID = 12345 + filelist = list() + for subband in range(244): + somehash = ''.join([random.choice(hexdigits) for a in range(6)]) + filename = f"L{obsID}_SB{subband:03}_uv.MS_{somehash}.tar" + with open(os.path.join(gendir, filename), "w") as loffile: + loffile.write(str(random.getrandbits(100000))) + +def configure_logger(): + logger = logging.getLogger("rucio") + logger.setLevel(logging.INFO) + handler = logging.FileHandler("/home/grange/ruciologs/rucio.log") + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + +def upload_files(gendir): + """ + Upload multiple files, automatising the upload procedure, and execute hourly till our next meeting. + Diego and Agustin have already a way to upload or register files. Please, follow up with them in case you face issues. + """ + scope = "LOFAR_ASTRON_GRANGE" + rse = "SARA-DCACHE" + lifetime = 12*3600 # 12 hours in seconds + logger = configure_logger() + uploader = uploadclient.UploadClient(logger=logger) + uploaddata = list() + for fname in os.listdir(gendir): + absfilepath = os.path.abspath(os.path.join(gendir, fname)) + uploaddict = {"path" : absfilepath, + "rse" : rse, + "did_scope" : scope, + "lifetime" : lifetime, + "register_after_upload" : True} + uploaddata.append(uploaddict) + voms = "escape:/escape/lofar" + certkeydir = "/home/grange/.rucio" + certpath = os.path.join(certkeydir, "client.crt") + keypath = os.path.join(certkeydir, "client.key") + + command_name = ["voms-proxy-init",f"--voms={voms}", f"--cert={certpath}", f"--key={keypath}"] + SP.call(command_name) + uploader.upload(uploaddata) + # For the hourly version, see hourly_push.sh + # $> crontab -l + # 28 * 9-13 * * /home/grange/rucio_dev/hourly_push.sh + # 28 0-11 14 * * /home/grange/rucio_dev/hourly_push.sh + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Run the second data exercise.') + parser.add_argument("--init", "-i", action="store_true", help="Create dummy data for upload") + parser.add_argument("--data-dir", "-d", help="Directory with the data to upload, or to create with init.", + default="lofar_data") + parser.add_argument("--upload", "-u", help="Upload the files.", action="store_true") + args = parser.parse_args() + if args.init: + create_files(args.data_dir) + elif args.upload: + upload_files(args.data_dir) +