From a99c97ba66cb9b42136129dbe9a4d44b5578190d Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Fri, 19 Aug 2016 09:20:01 +0000 Subject: [PATCH] Task #8415: Use docker-run-slurm.sh script, to contain resources and propagate kill signals in SLURM --- CEP/Pipeline/recipes/sip/pipeline.cfg.CEP4.tmpl | 2 +- MAC/Services/src/PipelineControl.py | 2 +- RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CEP/Pipeline/recipes/sip/pipeline.cfg.CEP4.tmpl b/CEP/Pipeline/recipes/sip/pipeline.cfg.CEP4.tmpl index e8d8adae319..138835c5bb1 100644 --- a/CEP/Pipeline/recipes/sip/pipeline.cfg.CEP4.tmpl +++ b/CEP/Pipeline/recipes/sip/pipeline.cfg.CEP4.tmpl @@ -70,4 +70,4 @@ globalfs = yes # /bin/bash -c # # Required because the pipeline framework needs some bash functionality in the commands it starts. -cmdline = ssh -n -tt -x localhost srun --exclusive --ntasks=1 --cpus-per-task={nr_cores} --jobid={slurm_job_id} --job-name={job_name} docker run --rm -u {uid} -v /data:/data --net=host {docker_env} lofar-pipeline:${LOFAR_TAG} {command} +cmdline = ssh -n -tt -x localhost srun --exclusive --ntasks=1 --cpus-per-task={nr_cores} --jobid={slurm_job_id} --job-name={job_name} docker-run-slurm.sh --rm -u {uid} -v /data:/data --net=host {docker_env} lofar-pipeline:${LOFAR_TAG} {command} diff --git a/MAC/Services/src/PipelineControl.py b/MAC/Services/src/PipelineControl.py index 7c780af556f..b79095bc2a2 100755 --- a/MAC/Services/src/PipelineControl.py +++ b/MAC/Services/src/PipelineControl.py @@ -493,7 +493,7 @@ runcmd {setStatus_active} wget -O - -q "http://ganglia.control.lofar/ganglia/api/events.php?action=add&start_time=now&summary=Pipeline {obsid} ACTIVE&host_regex=" # run the pipeline -runcmd docker run --rm --net=host \ +runcmd docker-run-slurm.sh --rm --net=host \ -e LOFARENV={lofarenv} \ -u $UID -e USER=$USER \ -e HOME=$HOME -v $HOME/.ssh:$HOME/.ssh:ro \ diff --git a/RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh b/RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh index 77fef760786..2333b3230c1 100755 --- a/RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh +++ b/RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh @@ -344,7 +344,7 @@ if $DOCKER; then # "echo 950000 > /sys/fs/cgroup/cpu/cpu.rt_runtime_us" is needed to configure the real-time scheduler for this cgroup # --cpu-shares=24576 (=24*1024) provides this process with as much share of the CPU as 24 other containers (note: CEP4 has 24 cores/node) - OUTPUTPROC_CMDLINE="docker run --rm --cpu-shares=24576 --cap-add=sys_nice --cap-add=sys_admin -u `id -u $SSH_USER_NAME` --net=host -v $GLOBALFS_DIR:$GLOBALFS_DIR lofar-outputproc:$TAG bash -c \"sudo echo 950000 > /sys/fs/cgroup/cpu/cpu.rt_runtime_us; $OUTPUTPROC_CMDLINE\"" + OUTPUTPROC_CMDLINE="docker-run-slurm.sh --rm --cpu-shares=24576 --cap-add=sys_nice --cap-add=sys_admin -u `id -u $SSH_USER_NAME` --net=host -v $GLOBALFS_DIR:$GLOBALFS_DIR lofar-outputproc:$TAG bash -c \"sudo echo 950000 > /sys/fs/cgroup/cpu/cpu.rt_runtime_us; $OUTPUTPROC_CMDLINE\"" fi echo "[outputProc] command line = $OUTPUTPROC_CMDLINE" -- GitLab