From f24d3003be4c29af3acdda6ed8beb9014e84c1f9 Mon Sep 17 00:00:00 2001 From: Stefan Froehlich <s.froehlich@fz-juelich.de> Date: Mon, 21 Sep 2015 11:39:03 +0000 Subject: [PATCH] Task #8440: added srun command as it is on jureca right now. left out the jureca fix for hostnames. --- .../framework/lofarpipe/support/remotecommand.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py b/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py index a6c0439fb94..2713ffe96f5 100644 --- a/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py +++ b/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py @@ -92,9 +92,24 @@ def run_remote_command(config, logger, host, command, env, arguments = None): return run_via_mpiexec(logger, command, arguments, host) elif method == "cep_mpi": return run_via_mpiexec_cep(logger, command, arguments, host) + elif method == "slurm_srun_cep3": + return run_via_slurm_srun_cep3(logger, command, arguments, host) else: return run_via_ssh(logger, host, command, env, arguments) +def run_via_slurm_srun_cep3(logger, command, arguments, host): + for arg in arguments: + command = command + " " + str(arg) + commandstring = ["srun","-N 1","--cpu_bind=map_cpu:none","-w",host, "/bin/sh", "-c", "hostname && " + command] + # we have a bug that crashes jobs when too many get startet at the same time + # temporary NOT 100% reliable workaround + #from random import randint + #time.sleep(randint(0,10)) + ########################## + process = spawn_process(commandstring, logger) + process.kill = lambda : os.kill(process.pid, signal.SIGKILL) + return process + def run_via_mpirun(logger, host, command, environment, arguments): """ Dispatch a remote command via mpirun. -- GitLab