Skip to content
Snippets Groups Projects
Commit b4967400 authored by Jorrit Schaap's avatar Jorrit Schaap
Browse files

Task #9607: always set status to completing befor aborted/finished, which...

Task #9607: always set status to completing befor aborted/finished, which solves the aborted trigger race condition
parent 0458c306
No related branches found
No related tags found
No related merge requests found
...@@ -497,13 +497,12 @@ runcmd docker-run-slurm.sh --rm --net=host \ ...@@ -497,13 +497,12 @@ runcmd docker-run-slurm.sh --rm --net=host \
-v /data:/data \ -v /data:/data \
{image} \ {image} \
runPipeline.sh -o {obsid} -c /opt/lofar/share/pipeline/pipeline.cfg.{cluster} -P {parset_dir} runPipeline.sh -o {obsid} -c /opt/lofar/share/pipeline/pipeline.cfg.{cluster} -P {parset_dir}
RESULT=$? RESULT=$?
if [ $RESULT -eq 0 ]; then # notify that we're tearing down
# notify that we're tearing down runcmd {setStatus_completing}
runcmd {setStatus_completing}
if [ $RESULT -eq 0 ]; then
# wait for MoM to pick up feedback before we set finished status # wait for MoM to pick up feedback before we set finished status
runcmd sleep 60 runcmd sleep 60
...@@ -512,17 +511,8 @@ if [ $RESULT -eq 0 ]; then ...@@ -512,17 +511,8 @@ if [ $RESULT -eq 0 ]; then
# notify ganglia # notify ganglia
wget -O - -q "http://ganglia.control.lofar/ganglia/api/events.php?action=add&start_time=now&summary=Pipeline {obsid} FINISHED&host_regex=" wget -O - -q "http://ganglia.control.lofar/ganglia/api/events.php?action=add&start_time=now&summary=Pipeline {obsid} FINISHED&host_regex="
else
# notify system that we've aborted
runcmd {setStatus_aborted}
# notify ganglia
wget -O - -q "http://ganglia.control.lofar/ganglia/api/events.php?action=add&start_time=now&summary=Pipeline {obsid} ABORTED&host_regex="
fi fi
# remove the abort-trigger job
scancel --jobname={obsid}-abort-trigger
# report status back to SLURM # report status back to SLURM
echo "Pipeline exited with status $RESULT" echo "Pipeline exited with status $RESULT"
exit $RESULT exit $RESULT
...@@ -537,7 +527,6 @@ exit $RESULT ...@@ -537,7 +527,6 @@ exit $RESULT
setStatus_active = setStatus_cmdline("active"), setStatus_active = setStatus_cmdline("active"),
setStatus_completing = setStatus_cmdline("completing"), setStatus_completing = setStatus_cmdline("completing"),
setStatus_finished = setStatus_cmdline("finished"), setStatus_finished = setStatus_cmdline("finished"),
setStatus_aborted = setStatus_cmdline("aborted"),
), ),
sbatch_params=sbatch_params sbatch_params=sbatch_params
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment