From bd74530ac3a6551a28a9c4430ee75c0055903dfc Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Thu, 25 Aug 2016 08:37:23 +0000 Subject: [PATCH] Task #9682: Fix SLURM job status if failed, and avoid requeueing of pipeline job on node failure because SLURM messes up its administation (mixes old and new job, makes job unfindable by jobname, triggers abort-trigger job anyway) --- MAC/Services/src/PipelineControl.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/MAC/Services/src/PipelineControl.py b/MAC/Services/src/PipelineControl.py index 851c0701aa3..ffee99a53ac 100755 --- a/MAC/Services/src/PipelineControl.py +++ b/MAC/Services/src/PipelineControl.py @@ -424,9 +424,6 @@ class PipelineControl(OTDBBusListener): # Enforce the dependencies, instead of creating lingering jobs "--kill-on-invalid-dep=yes", - # Restart job if a node fails - "--requeue", - # Maximum run time for job (31 days) "--time=31-0", @@ -471,11 +468,11 @@ function runcmd {{ PID=$! wait $PID # returns the exit status of "wait" if interrupted wait $PID # returns the exit status of $PID - RESULT=$? + CMDRESULT=$? trap - SIGTERM SIGINT - return $RESULT + return $CMDRESULT }} # print some info -- GitLab