Skip to content
Snippets Groups Projects
Commit 0d8629d3 authored by Yan Grange's avatar Yan Grange :wave:
Browse files

Added the dataset handling

parent 2ec480a5
No related branches found
No related tags found
No related merge requests found
#! /home/grange/rucio_dev/singupy.sh #! /home/grange/rucio_dev/singupy.sh
""" """
Add replication rule based on attributes: rucio add-rule YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED 1 QOS=FAST
Please contact Paul or Aleem for any problem/concern.
Check rule status: rucio list-rules YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED
Discover you files: rucio list-dids YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:* --filter type="ALL"
--filter type is necessary since by default rucio lists the datasets.
Create datasets, upload them, and add different files to them at a later stage.
Locate you file: rucio list-file-replicas YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED Locate you file: rucio list-file-replicas YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED
Create a simple workflow that access the data and perform actions on it (e.g. get checksum and cross-check it). Create a simple workflow that access the data and perform actions on it (e.g. get checksum and cross-check it).
...@@ -19,6 +9,7 @@ Delete file. ...@@ -19,6 +9,7 @@ Delete file.
Delete file from a dataset. Delete file from a dataset.
Delete dataset.""" Delete dataset."""
from rucio import client
from rucio.client import uploadclient from rucio.client import uploadclient
import os import os
from string import hexdigits as hxdraw from string import hexdigits as hxdraw
...@@ -27,6 +18,17 @@ import logging ...@@ -27,6 +18,17 @@ import logging
import argparse import argparse
import subprocess as SP import subprocess as SP
def voms_proxy_init():
voms = "escape:/escape/lofar"
certkeydir = "/home/grange/.rucio"
certpath = os.path.join(certkeydir, "client.crt")
keypath = os.path.join(certkeydir, "client.key")
command_name = ["voms-proxy-init",f"--voms={voms}", f"--cert={certpath}", f"--key={keypath}"]
SP.call(command_name)
def create_files(gendir): def create_files(gendir):
os.mkdir(gendir) # fails if already exists, but that's what I want anyhow. os.mkdir(gendir) # fails if already exists, but that's what I want anyhow.
hexdigits = hxdraw[:-6] hexdigits = hxdraw[:-6]
...@@ -47,12 +49,11 @@ def configure_logger(): ...@@ -47,12 +49,11 @@ def configure_logger():
logger.addHandler(handler) logger.addHandler(handler)
return logger return logger
def upload_files(gendir): def upload_files(gendir, scope):
""" """
Upload multiple files, automatising the upload procedure, and execute hourly till our next meeting. Upload multiple files, automatising the upload procedure, and execute hourly till our next meeting.
Diego and Agustin have already a way to upload or register files. Please, follow up with them in case you face issues. Diego and Agustin have already a way to upload or register files. Please, follow up with them in case you face issues.
""" """
scope = "LOFAR_ASTRON_GRANGE"
rse = "SARA-DCACHE" rse = "SARA-DCACHE"
lifetime = 12*3600 # 12 hours in seconds lifetime = 12*3600 # 12 hours in seconds
logger = configure_logger() logger = configure_logger()
...@@ -66,19 +67,88 @@ def upload_files(gendir): ...@@ -66,19 +67,88 @@ def upload_files(gendir):
"lifetime" : lifetime, "lifetime" : lifetime,
"register_after_upload" : True} "register_after_upload" : True}
uploaddata.append(uploaddict) uploaddata.append(uploaddict)
voms = "escape:/escape/lofar"
certkeydir = "/home/grange/.rucio"
certpath = os.path.join(certkeydir, "client.crt")
keypath = os.path.join(certkeydir, "client.key")
command_name = ["voms-proxy-init",f"--voms={voms}", f"--cert={certpath}", f"--key={keypath}"] voms_proxy_init()
SP.call(command_name)
uploader.upload(uploaddata) uploader.upload(uploaddata)
# For the hourly version, see hourly_push.sh # For the hourly version, see hourly_push.sh
# $> crontab -l # $> crontab -l
# 28 * 9-13 * * /home/grange/rucio_dev/hourly_push.sh # 28 * 9-13 * * /home/grange/rucio_dev/hourly_push.sh
# 28 0-11 14 * * /home/grange/rucio_dev/hourly_push.sh # 28 0-11 14 * * /home/grange/rucio_dev/hourly_push.sh
def add_replication_rule(scope):
"""
Add replication rule based on attributes: rucio add-rule YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED 1 QOS=FAST
Please contact Paul or Aleem for any problem/concern.
Check rule status: rucio list-rules YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:FILE_TO_BE_UPLOADED
"""
filename = random.choice(os.listdir("lofar_data"))
#rc = ruleclient.RuleClient()
rc = client.Client()
did = [{'scope':scope, 'name':filename}]
ncopies = 1
expression = "QOS=FAST"
lifetime = 12*3600
rc.add_replication_rule(dids=did, copies=ncopies, rse_expression=expression, lifetime=lifetime)
for rule in rc.list_associated_rules_for_file(scope=scope, name=filename):
print(rule)
def showmyfiles(scope):
"""
Discover you files: rucio list-dids YOUREXPERIMENT_YOURINSTITUTION_YOURNAME:* --filter type="ALL"
--filter type is necessary since by default rucio lists the datasets.
"""
rc = client.Client()
for fil in rc.list_dids(scope,{'name':"*"},type='all'):
print(fil)
def create_dataset(scope, datasetname):
"""
Create datasets, upload them, and add different files to them at a later stage.
"""
rc = client.Client()
lifetime = 600
rc.add_dataset(scope, datasetname, lifetime=lifetime)
rse = "SARA-DCACHE"
uploaddata = list()
logger = configure_logger()
for i in range(10):
filename = f"datafile_{datasetname}_{i}"
with open(filename, 'w') as fhd:
fhd.write("Data file for dataset")
uploaddict = {"path" : filename,
"rse" : rse,
"did_scope" : scope,
"lifetime" : lifetime,
"dataset_scope": scope,
"dataset_name": datasetname,
"register_after_upload" : True}
uploaddata.append(uploaddict)
voms_proxy_init()
uploader = uploadclient.UploadClient(logger=logger)
uploader.upload(uploaddata)
uploaddata = list()
datasetregdata = list()
for i in range(10):
filename = f"datafile_{datasetname}_{i}_addedlater"
with open(filename, 'w') as fhd:
fhd.write("Data file for dataset")
uploaddict = {"path" : filename,
"rse" : rse,
"did_scope" : scope,
"lifetime" : lifetime,
"register_after_upload" : True}
dataset_regdict = {'scope' : scope,
'name' : filename}
uploaddata.append(uploaddict)
datasetregdata.append(dataset_regdict)
uploader.upload(uploaddata)
rc.add_files_to_dataset(scope, datasetname, datasetregdata)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run the second data exercise.') parser = argparse.ArgumentParser(description='Run the second data exercise.')
...@@ -86,9 +156,21 @@ if __name__ == "__main__": ...@@ -86,9 +156,21 @@ if __name__ == "__main__":
parser.add_argument("--data-dir", "-d", help="Directory with the data to upload, or to create with init.", parser.add_argument("--data-dir", "-d", help="Directory with the data to upload, or to create with init.",
default="lofar_data") default="lofar_data")
parser.add_argument("--upload", "-u", help="Upload the files.", action="store_true") parser.add_argument("--upload", "-u", help="Upload the files.", action="store_true")
parser.add_argument("--replicate", "-r", help="Add replication rule.", action="store_true")
parser.add_argument("--showfiles", "-s", help="Show all files in scope", action="store_true")
parser.add_argument("--create-dataset", "-c", help="Create data sets", action="store_true")
args = parser.parse_args() args = parser.parse_args()
scope = "LOFAR_ASTRON_GRANGE"
dataset_name = "Dataset_for_assignment_6"
if args.init: if args.init:
create_files(args.data_dir) create_files(args.data_dir)
elif args.upload: elif args.upload:
upload_files(args.data_dir) upload_files(args.data_dir, scope)
elif args.replicate:
add_replication_rule(scope)
elif args.showfiles:
showmyfiles(scope)
elif args.create_dataset:
create_dataset(scope, dataset_name)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment