From 8d121a08d54f0c86f2e0f79a37f7d4635a33a11a Mon Sep 17 00:00:00 2001 From: Jorrit Schaap <schaap@astron.nl> Date: Tue, 15 Dec 2015 12:29:53 +0000 Subject: [PATCH] Task #8725: logging, flow, error handling, etc --- LTA/LTAIngest/ltacp.py | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) mode change 100644 => 100755 LTA/LTAIngest/ltacp.py diff --git a/LTA/LTAIngest/ltacp.py b/LTA/LTAIngest/ltacp.py old mode 100644 new mode 100755 index 522f773f749..817139284ce --- a/LTA/LTAIngest/ltacp.py +++ b/LTA/LTAIngest/ltacp.py @@ -218,9 +218,10 @@ def transfer(src_host, # waiting for output, comparing checksums, etc. - logger.info('ltacp %s: waiting for transfer to finish...' % src_filename) + logger.info('ltacp %s: waiting for remote data transfer to finish...' % src_filename) output_remote_data = p_remote_data.communicate() logger.debug('ltacp %s: remote data transfer finished...' % src_filename) + logger.info('ltacp %s: waiting for remote md5 checksum transfer to finish...' % src_filename) output_remote_checksum = p_remote_checksum.communicate() logger.debug('ltacp %s: remote md5 checksum transfer finished.' % src_filename) @@ -286,46 +287,49 @@ def transfer(src_host, logger.info('ltacp %s: removing local data fifo for globus-url-copy: %s' % (src_filename, local_data_fifo)) os.remove(local_data_fifo) - logger.debug('ltacp %s: waiting for subprocesses to complete...' % src_filename) # cancel any started hanging process, as they should all be finished by now - for p,cl in started_procs.items(): - if p.poll() == None: + hanging_procs = dict((p, cl) for (p, cl) in started_procs.items() if p.poll() == None) + + if len(hanging_procs): + logger.warning('ltacp %s: terminating %d hanging subprocesses...' % (src_filename, len(hanging_procs))) + for p,cl in hanging_procs.items(): logger.warning('ltacp %s: terminated hanging process pid=%d cmdline=%s' % (src_filename, p.pid, cl)) p.terminate() + logger.info('ltacp %s: terminated %d hanging subprocesses...' % (src_filename, len(hanging_procs))) logger.info('ltacp %s: successfully completed transfer of %s:%s to %s' % (src_filename, src_host, src_path_data, dst_surl)) return (md5_checksum_local, a32_checksum_local) -# execute command and optionally return exit code or output streams -def execute(cmd, return_output=False): +# execute command and return (stdout, stderr, returncode) tuple +def execute(cmd): logger.info('executing: %s' % ' '.join(cmd)) p_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE) - output_cmd = p_cmd.communicate() - if return_output: - return output_cmd - else: - return p_cmd.returncode + stdout, stderr = p_cmd.communicate() + return (stdout, stderr, p_cmd.returncode) # remove file from srm def srmrm(surl): - return execute(['srmrm', surl]) + return execute(['srmrm', surl])[2] # remove (empty) directory from srm def srmrmdir(surl): - return execute(['srmrmdir', surl]) + return execute(['srmrmdir', surl])[2] # remove file from srm def srmll(surl): - return execute(['srmls', '-l', surl], return_output=True) + return execute(['srmls', '-l', surl]) # get checksum from srm via srmls def get_srm_a32_checksum(surl): - output = srmll(surl)[0] + output, errors, code = srmll(surl) + + if code != 0: + return False if not 'Checksum type:' in output: return False @@ -348,7 +352,7 @@ def create_missing_directories(surl): # determine missing dirs while parent: - code = execute(['srmls', parent]) + code = execute(['srmls', parent])[2] if code == 0: logger.info('ltacp %s: srmls returned successfully, so this path apparently exists: %s' % parent) break; @@ -359,7 +363,7 @@ def create_missing_directories(surl): # recreate missing dirs while len(missing) > 0: parent = parent + '/' + missing.pop() - code = execute(['srmmkdir',"-retry_num=0",parent]) + code = execute(['srmmkdir',"-retry_num=0",parent])[2] if code != 0: logger.info('ltacp %s: failed to create missing directory: %s' % parent) return code @@ -372,6 +376,10 @@ def create_missing_directories(surl): # usage: ltacp.py <remote-host> <remote-path> <surl> if __name__ == '__main__': + if len(sys.argv) < 4: + print 'example: ./ltacp.py 10.196.232.11 /home/users/ingest/1M.txt srm://lofar-srm.fz-juelich.de:8443/pnfs/fz-juelich.de/data/lofar/ops/test/eor/1M.txt' + sys.exit() + # transfer test: transfer(sys.argv[1], sys.argv[2], sys.argv[3], 'ingest') -- GitLab