Skip to content
Snippets Groups Projects
Commit fbbf87bb authored by Auke Klazema's avatar Auke Klazema
Browse files

Task SW-581: Cherry Picked revision 40995 from trunk to release to have the...

Task SW-581: Cherry Picked revision 40995 from trunk to release to have the changes in the cobalt_functions.sh in production
parent 8f628477
No related branches found
Tags LOFAR-Release-3_2_6
No related merge requests found
......@@ -54,29 +54,57 @@ function read_cluster_model {
HEADNODE=head.cep4.control.lofar
SLURM_PARTITION=cpu
SLURM_RESERVATION=cobalt
# Get the nodes in the cobalt reservation. The reservation must be active.
RESVNODES=$(ssh $HEADNODE scontrol show res -o $SLURM_RESERVATION | perl -n -e 'm/Nodes=(.*?) .*State=ACTIVE/ ? print STDOUT $1 : die "No active cobalt reservation found"')
if [ $? -eq 0 ]; then
echo "Active reservation '$SLURM_RESERVATION' found, get online nodes in the reservation"
SINFO_FLAGS="--responding --states=idle,mixed,alloc,reserved -n $RESVNODES"
else
echo "WARNING: No reservation '$SLURM_RESERVATION' found, defaulting to all online nodes in partition '$SLURM_PARTITION'"
SINFO_FLAGS="--responding --states=idle,mixed,alloc --partition=$SLURM_PARTITION"
RESVCACHE=$LOFARROOT/var/run/slurmresv.cache
COMPCACHE=$LOFARROOT/var/run/compnodes.cache
# Get the reserved CEP4 nodes for output writing. Try three methods in order of precedence:
# 1. Get nodes from the cobalt slurm reservation (must have state active)
# 2. Read a cache file with the node list
# 3. Default to a particular set of nodes
echo "Reading the slurm '$SLURM_RESERVATION' reservation.."
RESVNODES=$(ssh $HEADNODE scontrol show res -o $SLURM_RESERVATION | \
perl -n -e 'm/Nodes=(.*?) .*State=ACTIVE/ ? print STDOUT $1 : die "WARNING: No active reservation found\n"')
if [ -n "$RESVNODES" ]; then
# save in cache
cat <<-CAT > $RESVCACHE
echo "Cache created at $(date)"
RESVNODES="$RESVNODES"
CAT
elif [ -s $RESVCACHE ]; then
echo "Reading the cache file '$RESVCACHE'"
source $RESVCACHE
else
echo "WARNING: No reserved nodes and no cache file found, using defaults"
RESVNODES="cpu[40-44]"
fi
echo "Reserved nodes: $RESVNODES"
# Checking online status: try three methods in order of precedence:
# 1. Check slurm for the node status (sinfo)
# 2. Read a cache file with the node list
# 3. Default to a particular set of nodes
echo "Checking online status"
SINFO_FLAGS="--responding --states=idle,mixed,alloc,reserved -n $RESVNODES"
COMPUTENODES="$(ssh $HEADNODE sinfo --format=%n.cep4.infiniband.lofar,%T --noheader --sort=N $SINFO_FLAGS | fgrep -v ,draining | cut -f1 -d,)"
# OLD COMPUTENODES="`ssh $HEADNODE sinfo --responding --states=idle,mixed,alloc --format=%n.cep4.infiniband.lofar,%T --noheader --partition=$SLURM_PARTITION --sort=N | fgrep -v ,draining | cut -f1 -d,`"
if [ -z "$COMPUTENODES" ]; then
echo "ERROR: Could not obtain list of available CEP4 nodes. Defaulting to all."
COMPUTENODES="`seq -f "cpu%02.0f.cep4.infiniband.lofar" 1 47`"
if [ -n "$COMPUTENODES" ]; then
# save in cache
cat <<-CAT > $COMPCACHE
echo "Cache created at $(date)"
COMPUTENODES="$COMPUTENODES"
CAT
elif [ -s $COMPCACHE ]; then
echo "Reading the cache file '$COMPCACHE'"
source $COMPCACHE
else
echo "WARNING: No active nodes and no cache file found, using defaults"
COMPUTENODES="`seq -f "cpu%02.0f.cep4.infiniband.lofar" 40 44`"
fi
echo -e "Nodes used for output writing:\n${COMPUTENODES}"
GLOBALFS_DIR=/data
#SLURM=true
SLURM=false # Don't use SLURM for now, let's get it working without it first
GLOBALFS=true
DOCKER=false # disabled as outputproc is too slow on docker 1.9.1 (#9522)
GLOBALFS_DIR=/data
SLURM=false # Don't use SLURM for now, let's get it working without it first
DOCKER=false # disabled as outputproc is too slow on docker 1.9.1 (#9522)
OUTPUTPROC_ROOT="`echo '/opt/outputproc-${LOFAR_TAG}' | docker-template`"
;;
DRAGNET)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment