Skip to content
Snippets Groups Projects
Commit 3c31a7f1 authored by Jan David Mol's avatar Jan David Mol
Browse files

COB-4: Disable host-key checking, to avoid stalls

parent 4c86a665
No related branches found
No related tags found
1 merge request!6Import cobalt2 into lofar4
......@@ -267,17 +267,17 @@ if $SLURM; then
# Allocate resources
# TODO: Start outputProc here
ssh $HEADNODE srun -N $NRCOMPUTENODES -c 1 --job-name=$OBSID bash -c 'while sleep 1; do :; done' &
ssh -o StrictHostKeyChecking=no $HEADNODE srun -N $NRCOMPUTENODES -c 1 --job-name=$OBSID bash -c 'while sleep 1; do :; done' &
# Wait for allocation
while [ "`ssh $HEADNODE sacct --starttime=now --noheader --parsable2 --format=state --name=$OBSID | tail -n 1`" != "RUNNING" ]; do sleep 1; done
while [ "`ssh -o StrictHostKeyChecking=no $HEADNODE sacct --starttime=now --noheader --parsable2 --format=state --name=$OBSID | tail -n 1`" != "RUNNING" ]; do sleep 1; done
# Obtain node list
NODE_LIST="`ssh $HEADNODE sacct --starttime=now --noheader --parsable2 --format=nodelist --name=$OBSID | tail -n 1`"
NODE_LIST="`ssh -o StrictHostKeyChecking=no $HEADNODE sacct --starttime=now --noheader --parsable2 --format=nodelist --name=$OBSID | tail -n 1`"
# Expand node list into something usable
# TODO: move ".cep4" to cluster model
NODE_LIST="`ssh $HEADNODE scontrol show hostnames $NODE_LISTi | awk '{ print $1 ".cep4"; }'`"
NODE_LIST="`ssh -o StrictHostKeyChecking=no $HEADNODE scontrol show hostnames $NODE_LISTi | awk '{ print $1 ".cep4"; }'`"
else
# Derive host list from parset
NODE_LIST=$(getOutputProcHosts $PARSET)
......@@ -313,7 +313,7 @@ then
fi
# test the connection with local host: minimal test for valid credentials
ssh -l $SSH_USER_NAME $KEY_STRING "localhost" "/bin/true" || error "[cobalt] Failed to ssh to localhost"
ssh -o StrictHostKeyChecking=no -l $SSH_USER_NAME $KEY_STRING "localhost" "/bin/true" || error "[cobalt] Failed to ssh to localhost"
# Create a helper function for delete child processes and
# a file containing the PID of these processes
......@@ -340,7 +340,7 @@ function clean_up {
if $SLURM; then
echo "[children] Cancelling SLURM allocation"
ssh $HEADNODE scancel --jobname=$OBSID
ssh -o StrictHostKeyChecking=no $HEADNODE scancel --jobname=$OBSID
fi
echo "[children] Sending SIGTERM"
......@@ -398,7 +398,7 @@ if ! $DUMMY_RUN; then
else
for HOST in $NODE_LIST
do
COMMAND="ssh -tt -l $SSH_USER_NAME $KEY_STRING $SSH_USER_NAME@$HOST $OUTPUTPROC_CMDLINE"
COMMAND="ssh -tt -o StrictHostKeyChecking=no -l $SSH_USER_NAME $KEY_STRING $SSH_USER_NAME@$HOST $OUTPUTPROC_CMDLINE"
echo "[outputProc] Starting $COMMAND"
$COMMAND &
......@@ -434,11 +434,11 @@ do
[ "$h" == "`hostname`" ] && continue;
# Ignore hosts that already have the same parset (for example, through NFS).
timeout $KILLOPT 5s ssh -qn $h "[ -f $PARSET ] && echo \"$cksumline\" | md5sum -c --status" && continue
timeout $KILLOPT 5s ssh -o StrictHostKeyChecking=no -qn $h "[ -f $PARSET ] && echo \"$cksumline\" | md5sum -c --status" && continue
# Copy parset to remote node
echo "Copying parset to $h:$PARSET"
timeout $KILLOPT 30s scp -Bq $PARSET $h:$PARSET || error "[parset] Could not scp parset to $h"
timeout $KILLOPT 30s scp -o StrictHostKeyChecking=no -Bq $PARSET $h:$PARSET || error "[parset] Could not scp parset to $h"
done
# ************************************
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment