From ea224ae9b947b0a0bdd2ffdf959b3db675c001a3 Mon Sep 17 00:00:00 2001
From: Jan David Mol <mol@astron.nl>
Date: Mon, 20 Jun 2016 13:03:02 +0000
Subject: [PATCH] Task #9192: Run pipelines with lower priority (than
 inspection plots etc), prevent cancellation of docker pull/tag jobs,
 propagate runPipeline exit code

---
 MAC/Services/src/PipelineControl.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/MAC/Services/src/PipelineControl.py b/MAC/Services/src/PipelineControl.py
index 40ab25b3442..1a751d8fc99 100755
--- a/MAC/Services/src/PipelineControl.py
+++ b/MAC/Services/src/PipelineControl.py
@@ -314,6 +314,9 @@ class PipelineControl(OTDBBusListener):
 
                      # Maximum run time for job (31 days)
                      "--time=31-0",
+
+                     # Lower priority to drop below inspection plots
+                     "--nice=1000",
                    
                      "--partition=%s" % parset.processingPartition(),
                      "--nodes=%s" % parset.processingNumberOfTasks(),
@@ -344,11 +347,11 @@ class PipelineControl(OTDBBusListener):
       "{setStatus_active}\n"
       # pull docker image from repository on all nodes
       "srun --nodelist=$SLURM_NODELIST --cpus-per-task=1 --job-name=docker-pull"
-        " --no-kill"
+        " --kill-on-bad-exit=0 --wait=0"
         " docker pull {repository}/{image}\n"
       # put a local tag on the pulled image
       "srun --nodelist=$SLURM_NODELIST --cpus-per-task=1 --job-name=docker-tag"
-        " --no-kill"
+        " --kill-on-bad-exit=0 --wait=0"
         " docker tag -f {repository}/{image} {image}\n"
       # call runPipeline.sh in the image on this node
       "docker run --rm"
@@ -361,7 +364,7 @@ class PipelineControl(OTDBBusListener):
         " -e SLURM_JOB_ID=$SLURM_JOB_ID"
         " -v /data:/data"
         " {image}"
-        " runPipeline.sh -o {obsid} -c /opt/lofar/share/pipeline/pipeline.cfg.{cluster}\n"
+        " runPipeline.sh -o {obsid} -c /opt/lofar/share/pipeline/pipeline.cfg.{cluster} || exit $?\n"
 
         # notify that we're tearing down
         "{setStatus_completing}\n"
-- 
GitLab