diff --git a/.gitattributes b/.gitattributes index bd1433f810886f2ed286ed767c533c6f9b7f908f..49612bd879076a358fd6ff1ed0740c3dee1658b9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4552,6 +4552,7 @@ SubSystems/Online_Cobalt/validation/cluster/c3/cexec -text SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cep.test -text SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cobalt.test eol=lf SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2locus.test -text +SubSystems/Online_Cobalt/validation/cluster/funcs.sh eol=lf SubSystems/Online_Cobalt/validation/cobalt/casacore/meastable.test -text SubSystems/Online_Cobalt/validation/system/gpu/basic-gpu.test -text SubSystems/Online_Cobalt/validation/system/gpu/persistence-mode.test -text diff --git a/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cep.test b/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cep.test index c188f8a70090bdfc69376a276bca7f24a53d0fc4..15c8931188f02764a08d6d08cd5098f3ae79d9a7 100755 --- a/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cep.test +++ b/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cep.test @@ -7,29 +7,18 @@ # # $Id$ -# Handle common signals -trap 'exit 1' 1 2 3 15 +. $(dirname $0)/../funcs.sh -host=$(hostname) +# Setup signal handler. +trap 'print_status; exit' 1 2 3 15 + +# List of crucial CEP systems that must be online TARGETS="lcs015 ccu001 kis001 lhn001 sasdb" +host=$(hostname) status=0 for target in $TARGETS do - printf "%-20s " "$host --> $target" - timeout 1 ping -c 1 $target > /dev/null 2>&1 - sts=$? - if [[ $sts -ne 0 ]] - then - status=1 - if [[ $sts -eq 124 ]] - then - echo "TIMEOUT" - else - echo "FAILED" - fi - else - echo "OK" - fi + run_command -q "ping -c 1 $target" 1 || status=1 done exit $status diff --git a/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cobalt.test b/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cobalt.test index 0989e1ad8707fc65b8f2f495d5e8a833e7ded8cd..4d62fe2cf08c0fe6bcc9077e621b40511fd0c733 100755 --- a/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cobalt.test +++ b/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2cobalt.test @@ -1,31 +1,19 @@ -#!/bin/bash +#!/bin/sh # # Test connectivity from Cobalt to Cobalt. # # $Id$ -# Handle common signals -trap 'exit 1' 1 2 3 15 +. $(dirname $0)/../funcs.sh + +# Setup signal handler. +trap 'print_status; exit' 1 2 3 15 host=$(hostname) status=0 -for ((i=1; i<9; i++)) +for i in $(seq 1 9) do target=$(printf cbm%03d $i) - echo -n "$host --> $target " - timeout 1 ssh $target /bin/true > /dev/null 2>&1 - sts=$? - if [[ $sts -ne 0 ]] - then - status=1 - if [[ $sts -eq 124 ]] - then - echo "TIMEOUT" - else - echo "FAILED" - fi - else - echo "OK" - fi + run_command "ssh $target /bin/true" 1 || status=1 done -exit $status \ No newline at end of file +exit $status diff --git a/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2locus.test b/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2locus.test index ecd81f4f751e8a19877ae2979a69501b81c582fe..aa46e3e3e1e0e7c54bc826c4b1a13b16e108f1cd 100755 --- a/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2locus.test +++ b/SubSystems/Online_Cobalt/validation/cluster/connectivity/cobalt2locus.test @@ -1,4 +1,4 @@ -#!/bin/sh -xe +#!/bin/sh # # Test connectivity from Cobalt to the Locus nodes locus001..locus100 # @@ -9,32 +9,36 @@ # # $Id$ -# Handle common signals -trap 'rm -f -- $C3_CONF; exit' 0 1 2 3 15 +# Source useful functions. +. $(dirname $0)/../funcs.sh -# Execute all remote commands with a timeout. Use a decent timeout (15 sec); -# some locus nodes may be sluggish when running pipelines -TIMEOUT="timeout 15" +# Path to our local cexec program. +CEXEC=$(cd $(dirname $0)/../c3 && pwd)/cexec -# Define SSH, SCP -SSH="$TIMEOUT ssh" -SCP="$TIMEOUT scp" -CEXEC="$TIMEOUT $(cd ../c3 && pwd)/cexec" +# Setup cleanup handler. +trap 'STATUS=$?; rm -rf -- "$TMPDIR"; exit $STATUS' 0 1 2 3 15 -# Get the location of cexec on lhn001 -C3_PATH=$(dirname $($SSH lhn001 which cexec)) +# Create temporary directory for output files +TMPDIR=$(mktemp -dt "$(basename $0).XXXXXX") || exit -# Location of our temporary c3.conf file -C3_CONF=$(tempfile -p "c3." -s ".conf") +# Filenames for our output files. +C3_CONF=$TMPDIR/c3.conf +LHN_LOCUS=$TMPDIR/lhn.locus +CBM_LOCUS=$TMPDIR/cbm.locus + +# Path to remote cexec command +C3_PATH=$(dirname $(run_command "ssh lhn001 which cexec")) || exit # Retrieve the c3.conf file from lhn001. -$SCP lhn001:/etc/c3.conf $C3_CONF +run_command "scp lhn001:/etc/c3.conf $C3_CONF" || exit # Retrieve the list of locus nodes reachable from lhn001 -LHN_LOCUS=$($SSH lhn001 cexec locus: hostname | grep '^locus') +run_command "ssh lhn001 cexec locus: hostname" | \ + grep '^locus' > $LHN_LOCUS || exit # Retrieve the list of locus nodes reachable from localhost -CBM_LOCUS=$(C3_PATH=$C3_PATH $CEXEC -f $C3_CONF locus: hostname | grep '^locus') +C3_PATH=$C3_PATH run_command "$CEXEC -f $C3_CONF locus: hostname" | \ + grep '^locus' > $CBM_LOCUS || exit # Compare the results. -[ "$LHN_LOCUS" = "$CBM_LOCUS" ] +diff -s $LHN_LOCUS $CBM_LOCUS diff --git a/SubSystems/Online_Cobalt/validation/cluster/funcs.sh b/SubSystems/Online_Cobalt/validation/cluster/funcs.sh new file mode 100644 index 0000000000000000000000000000000000000000..b17511272ecf6c01d72602d6f0947dec11bb81f1 --- /dev/null +++ b/SubSystems/Online_Cobalt/validation/cluster/funcs.sh @@ -0,0 +1,45 @@ +# +# Useful functions that can be used by the cluster test scripts +# + +# Signal handler function. Prints exit status and returns it. +print_status() +{ + STATUS=$? + case $STATUS in + 0) + echo >&2 "OK" ;; + 124) + echo >&2 "TIMEOUT" ;; + 129|13[0-9]|14[0-3]) + echo >&2 "SIGNALLED ($STATUS)" ;; + *) + echo >&2 "ERROR ($STATUS)" ;; + esac + return $STATUS +} + +# Run a command with a timeout. +# +# Usage: run_command [options] "command" [timeout] +# +# Valid options: +# -q (quiet), redirect stdout to /dev/null +# +# The timeout period is given in seconds and defaults to 15. +run_command() +{ + while getopts "q" opt + do + case $opt in + q) exec 1>/dev/null ;; + esac + done + shift $((OPTIND-1)) + COMMAND="$1" + TIMEOUT="${2:-15}" # default timeout: 15 seconds + echo -n "$COMMAND: " >&2 + timeout -k1 $TIMEOUT $COMMAND 2> /dev/null & + wait $! 2> /dev/null + print_status +} \ No newline at end of file