From f6ac9ef19ca6eb0c74ee48b2eba2132f5542e474 Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Thu, 2 Jun 2016 09:11:48 +0000 Subject: [PATCH] Task #8887: Removed dependency on pipelineAborted.sh, allowing pipelines to go to aborted even if the docker image is not available --- .gitattributes | 1 - CEP/Pipeline/recipes/sip/CMakeLists.txt | 1 - .../recipes/sip/bin/pipelineAborted.sh | 52 ------------------- MAC/Services/src/PipelineControl.py | 25 +++++---- 4 files changed, 12 insertions(+), 67 deletions(-) delete mode 100755 CEP/Pipeline/recipes/sip/bin/pipelineAborted.sh diff --git a/.gitattributes b/.gitattributes index 32f2277e1b6..07d87d47bd6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1522,7 +1522,6 @@ CEP/Pipeline/recipes/sip/bin/long_baseline_pipeline.py eol=lf CEP/Pipeline/recipes/sip/bin/msss_calibrator_pipeline.py eol=lf CEP/Pipeline/recipes/sip/bin/msss_imager_pipeline.py eol=lf CEP/Pipeline/recipes/sip/bin/msss_target_pipeline.py eol=lf -CEP/Pipeline/recipes/sip/bin/pipelineAborted.sh eol=lf CEP/Pipeline/recipes/sip/bin/pulsar_pipeline.py -text CEP/Pipeline/recipes/sip/bin/runPipeline.sh eol=lf CEP/Pipeline/recipes/sip/bin/selfcal_imager_pipeline.py eol=lf diff --git a/CEP/Pipeline/recipes/sip/CMakeLists.txt b/CEP/Pipeline/recipes/sip/CMakeLists.txt index b034415be1d..297a997162b 100644 --- a/CEP/Pipeline/recipes/sip/CMakeLists.txt +++ b/CEP/Pipeline/recipes/sip/CMakeLists.txt @@ -74,7 +74,6 @@ lofar_add_bin_scripts( bin/pulsar_pipeline.py bin/long_baseline_pipeline.py bin/selfcal_imager_pipeline.py - bin/pipelineAborted.sh bin/runPipeline.sh bin/startPython.sh bin/startPythonVersion.sh diff --git a/CEP/Pipeline/recipes/sip/bin/pipelineAborted.sh b/CEP/Pipeline/recipes/sip/bin/pipelineAborted.sh deleted file mode 100755 index bf835676ca8..00000000000 --- a/CEP/Pipeline/recipes/sip/bin/pipelineAborted.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash -e - -# Signals a specific obs id as ABORTED -# -# The following chain is executed: -# -# setStatus(ABORTED) -# -# Syntax: -# -# runPipeline.sh -o <obsid> || pipelineAborted.sh -o <obsid> - -# ======= Defaults - -# Obs ID -OBSID= - -# Queue on which to post status changes -SETSTATUS_BUS=lofar.otdb.command - -# ======= Parse command-line parameters - -function usage() { - echo "$0 -o OBSID [options]" - echo "" - echo " -o OBSID Task identifier" - echo " -B busname Bus name to post status changes on (default: $SETSTATUS_BUS)" - exit 1 -} - -while getopts "o:c:p:B:" opt; do - case $opt in - h) usage - ;; - o) OBSID="$OPTARG" - ;; - B) SETSTATUS_BUS="$OPTARG" - ;; - \?) error "Invalid option: -$OPTARG" - ;; - :) error "Option requires an argument: -$OPTARG" - ;; - esac -done -[ -z "$OBSID" ] && usage - -# ======= Run - -# Mark as aborted -setStatus.py -o $OBSID -s aborted -B $SETSTATUS_BUS || true - -exit 0 diff --git a/MAC/Services/src/PipelineControl.py b/MAC/Services/src/PipelineControl.py index 40517b11a80..e8447d68307 100755 --- a/MAC/Services/src/PipelineControl.py +++ b/MAC/Services/src/PipelineControl.py @@ -30,7 +30,7 @@ The execution chains are as follows: [SCHEDULED] -> PipelineControl schedules - runPipeline.sh <obsid> || pipelineAborted.sh <obsid> + runPipeline.sh <obsid> || setStatus.py -o <obsid> -s aborted using two SLURM jobs, guaranteeing that pipelineAborted.sh is called in the following circumstances: @@ -49,7 +49,7 @@ The execution chains are as follows: - (wrap up) - state <- [FINISHED] -(pipelineAborted.sh) -> Calls +(setStatus.py) -> Calls - state <- [ABORTED] ----------------------------- @@ -72,6 +72,7 @@ import subprocess import datetime import os import re +from socket import getfqdn import logging logger = logging.getLogger(__name__) @@ -378,18 +379,16 @@ class PipelineControl(OTDBBusListener): # Schedule pipelineAborted.sh logger.info("Scheduling SLURM job for pipelineAborted.sh") - slurm_cancel_job_id = self.slurm.submit("%s-aborted" % parset.slurmJobName(), + slurm_cancel_job_id = self.slurm.submit("%s-abort-trigger" % parset.slurmJobName(), - "docker run --rm" - " --net=host" - " -u $UID" - " -e LOFARENV={lofarenv}" - " {image}" - " pipelineAborted.sh -o {obsid} -B {status_bus}" + "ssh {myhostname} '" + "source {lofarroot}/lofarinit.sh && " + "setStatus.py -o {obsid} -s aborted -B {status_bus}" + "'" .format( - lofarenv = os.environ.get("LOFARENV", ""), + myhostname = getfqdn(), + lofarroot = os.environ.get("LOFARROOT", ""), obsid = otdbId, - image = parset.defaultDockerImage(), status_bus = self.otdb_service_busname, ), @@ -399,8 +398,8 @@ class PipelineControl(OTDBBusListener): "--dependency=afternotok:%s" % slurm_job_id, "--kill-on-invalid-dep=yes", "--requeue", - "--error=/data/log/pipelineAborted-%s.stderr" % (otdbId,), - "--output=/data/log/pipelineAborted-%s.log" % (otdbId,), + "--error=/data/log/abort-trigger-%s.stderr" % (otdbId,), + "--output=/data/log/abort-trigger-%s.log" % (otdbId,), ] ) logger.info("Scheduled SLURM job %s" % (slurm_cancel_job_id,)) -- GitLab