Skip to content
Snippets Groups Projects

update submit script to accomadata idols data

Merged Maaijke Mevius requested to merge update_for_idols into main
1 file
+ 25
13
Compare changes
  • Side-by-side
  • Inline
@@ -26,22 +26,32 @@ def parse_args():
@@ -26,22 +26,32 @@ def parse_args():
parser.add_argument('filter', help='filter string', type=str)
parser.add_argument('filter', help='filter string', type=str)
parser.add_argument('--averaging_window', help='size of window in s', type=float, default=1)
parser.add_argument('--averaging_window', help='size of window in s', type=float, default=1)
parser.add_argument('--stations', help='list of stations', type=str, nargs="+")
parser.add_argument('--stations', help='list of stations', type=str, nargs="+")
 
parser.add_argument('--sap_id', help='specify sapid', type=int)
 
parser.add_argument('--obs_filter', help='specify specific observation string (e.g. B000_S0)')
 
parser.add_argument('--dynspec', help='only select data in dynspec format',action='store_true')
parser.add_argument('sas_ids', help='list of SAS IDS', nargs='+')
parser.add_argument('sas_ids', help='list of SAS IDS', nargs='+')
return parser.parse_args()
return parser.parse_args()
logger = logging.getLogger('Solar query LTA')
logger = logging.getLogger('Solar query LTA')
QUERY_STR = '''
def create_query_string(obs_filter=None,sap_id=None,dynspec=False):
SELECT FO.OBJECT_ID AS OBJECT_ID,
QUERY_STR = '''
FO.FILESIZE AS FILESIZE,
SELECT FO.OBJECT_ID AS OBJECT_ID,
ST.OBS_ID as OBS_ID,
FO.FILESIZE AS FILESIZE,
FO."+PROJECT" AS PROJECT,
ST.OBS_ID as OBS_ID,
STR.PRIMARY_URL AS PRIMARY_URL FROM AWOPER.FILEOBJECT FO JOIN
FO."+PROJECT" AS PROJECT,
 
STR.PRIMARY_URL AS PRIMARY_URL FROM AWOPER.FILEOBJECT FO JOIN
AWOPER.STORAGETICKETRESOURCE STR ON FO.STORAGE_TICKET_RESOURCE=STR.OBJECT_ID JOIN
AWOPER.STORAGETICKETRESOURCE STR ON FO.STORAGE_TICKET_RESOURCE=STR.OBJECT_ID JOIN
AWOPER.STORAGETICKET ST ON ST.OBJECT_ID=STR.TICKET
AWOPER.STORAGETICKET ST ON ST.OBJECT_ID=STR.TICKET
WHERE OBS_ID = '{}' AND STR.PRIMARY_URL LIKE '%/LOFAR_DYNSPEC%'
WHERE OBS_ID = '{sasid}' '''
'''
if dynspec:
 
QUERY_STR += '''AND STR.PRIMARY_URL LIKE '%/LOFAR_DYNSPEC%' '''
 
if obs_filter is not None:
 
QUERY_STR += f'''AND STR.PRIMARY_URL LIKE '%{obs_filter}%' '''
 
if sap_id is not None:
 
QUERY_STR += f'''AND STR.PRIMARY_URL LIKE '%SAP{sap_id:03d}%' '''
 
return QUERY_STR
def read_user_credentials(file_paths: Union[str, List[str]]) -> Dict:
def read_user_credentials(file_paths: Union[str, List[str]]) -> Dict:
@@ -68,16 +78,18 @@ def create_db_engine(user: str, password: str):
@@ -68,16 +78,18 @@ def create_db_engine(user: str, password: str):
return create_engine(connection_string)
return create_engine(connection_string)
def find_surl_and_size_per_sasid(engine, sasid):
def find_surl_and_size_per_sasid(engine, sasid, QUERY_STR):
result_df = pandas.read_sql_query(QUERY_STR.format(sasid), engine)
logging.info(("querying",QUERY_STR.format(sasid=sasid))
 
result_df = pandas.read_sql_query(QUERY_STR.format(sasid=sasid), engine)
return result_df
return result_df
def parse_surl_for_info(surl):
def parse_surl_for_info(surl):
 
logging.info(("parsing",surl)
parsed = urlparse(surl)
parsed = urlparse(surl)
site = parsed.hostname
site = parsed.hostname
path = parsed.path
path = parsed.path
pattern = r'^.*/projects\/(?P<project>.*_\d*)\/(?P<sas_id>\w\d*)\/'
pattern = r'^.*/projects\/(?P<project>.*\d*)\/(?P<sas_id>\w\d*)\/'
groups = re.match(pattern, path).groupdict()
groups = re.match(pattern, path).groupdict()
result = dict()
result = dict()
@@ -145,11 +157,11 @@ def main():
@@ -145,11 +157,11 @@ def main():
atdb_parameters = read_atdb_parameters(DEFAULT_CONFIG_PATHS)
atdb_parameters = read_atdb_parameters(DEFAULT_CONFIG_PATHS)
atdb_interface = AInterface(atdb_parameters['url'], atdb_parameters['token'])
atdb_interface = AInterface(atdb_parameters['url'], atdb_parameters['token'])
engine = create_db_engine(**credentials)
engine = create_db_engine(**credentials)
 
QUERY_STR = create_query_string(obs_filter=args.obs_filter,sap_id=args.sap_id,dynspec=args.dynspec)
for sas_id in sas_ids:
for sas_id in sas_ids:
sas_id = int(sas_id)
sas_id = int(sas_id)
logger.info('fetching surl for sas_id %s', sas_id)
logger.info('fetching surl for sas_id %s', sas_id)
table = find_surl_and_size_per_sasid(engine, sas_id,QUERY_STR)
table = find_surl_and_size_per_sasid(engine, sas_id)
for index, line in table.iterrows():
for index, line in table.iterrows():
logging.info('creating task for sas_id %s - dataset %s', sas_id, index)
logging.info('creating task for sas_id %s - dataset %s', sas_id, index)
payload = create_payload_from_entry(line, args.filter, args.workflow, args.averaging_window, args.stations)
payload = create_payload_from_entry(line, args.filter, args.workflow, args.averaging_window, args.stations)
Loading