Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
LOFAR
Manage
Activity
Members
Labels
Plan
Issues
Wiki
Jira issues
Open Jira
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RadioObservatory
LOFAR
Commits
3c31a7f1
Commit
3c31a7f1
authored
6 years ago
by
Jan David Mol
Browse files
Options
Downloads
Patches
Plain Diff
COB-4
: Disable host-key checking, to avoid stalls
parent
4c86a665
No related branches found
No related tags found
1 merge request
!6
Import cobalt2 into lofar4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh
+9
-9
9 additions, 9 deletions
RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh
with
9 additions
and
9 deletions
RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh
+
9
−
9
View file @
3c31a7f1
...
@@ -267,17 +267,17 @@ if $SLURM; then
...
@@ -267,17 +267,17 @@ if $SLURM; then
# Allocate resources
# Allocate resources
# TODO: Start outputProc here
# TODO: Start outputProc here
ssh
$HEADNODE
srun
-N
$NRCOMPUTENODES
-c
1
--job-name
=
$OBSID
bash
-c
'while sleep 1; do :; done'
&
ssh
-o
StrictHostKeyChecking
=
no
$HEADNODE
srun
-N
$NRCOMPUTENODES
-c
1
--job-name
=
$OBSID
bash
-c
'while sleep 1; do :; done'
&
# Wait for allocation
# Wait for allocation
while
[
"
`
ssh
$HEADNODE
sacct
--starttime
=
now
--noheader
--parsable2
--format
=
state
--name
=
$OBSID
|
tail
-n
1
`
"
!=
"RUNNING"
]
;
do
sleep
1
;
done
while
[
"
`
ssh
-o
StrictHostKeyChecking
=
no
$HEADNODE
sacct
--starttime
=
now
--noheader
--parsable2
--format
=
state
--name
=
$OBSID
|
tail
-n
1
`
"
!=
"RUNNING"
]
;
do
sleep
1
;
done
# Obtain node list
# Obtain node list
NODE_LIST
=
"
`
ssh
$HEADNODE
sacct
--starttime
=
now
--noheader
--parsable2
--format
=
nodelist
--name
=
$OBSID
|
tail
-n
1
`
"
NODE_LIST
=
"
`
ssh
-o
StrictHostKeyChecking
=
no
$HEADNODE
sacct
--starttime
=
now
--noheader
--parsable2
--format
=
nodelist
--name
=
$OBSID
|
tail
-n
1
`
"
# Expand node list into something usable
# Expand node list into something usable
# TODO: move ".cep4" to cluster model
# TODO: move ".cep4" to cluster model
NODE_LIST
=
"
`
ssh
$HEADNODE
scontrol show hostnames
$NODE_LISTi
|
awk
'{ print $1 ".cep4"; }'
`
"
NODE_LIST
=
"
`
ssh
-o
StrictHostKeyChecking
=
no
$HEADNODE
scontrol show hostnames
$NODE_LISTi
|
awk
'{ print $1 ".cep4"; }'
`
"
else
else
# Derive host list from parset
# Derive host list from parset
NODE_LIST
=
$(
getOutputProcHosts
$PARSET
)
NODE_LIST
=
$(
getOutputProcHosts
$PARSET
)
...
@@ -313,7 +313,7 @@ then
...
@@ -313,7 +313,7 @@ then
fi
fi
# test the connection with local host: minimal test for valid credentials
# test the connection with local host: minimal test for valid credentials
ssh
-l
$SSH_USER_NAME
$KEY_STRING
"localhost"
"/bin/true"
||
error
"[cobalt] Failed to ssh to localhost"
ssh
-o
StrictHostKeyChecking
=
no
-l
$SSH_USER_NAME
$KEY_STRING
"localhost"
"/bin/true"
||
error
"[cobalt] Failed to ssh to localhost"
# Create a helper function for delete child processes and
# Create a helper function for delete child processes and
# a file containing the PID of these processes
# a file containing the PID of these processes
...
@@ -340,7 +340,7 @@ function clean_up {
...
@@ -340,7 +340,7 @@ function clean_up {
if
$SLURM
;
then
if
$SLURM
;
then
echo
"[children] Cancelling SLURM allocation"
echo
"[children] Cancelling SLURM allocation"
ssh
$HEADNODE
scancel
--jobname
=
$OBSID
ssh
-o
StrictHostKeyChecking
=
no
$HEADNODE
scancel
--jobname
=
$OBSID
fi
fi
echo
"[children] Sending SIGTERM"
echo
"[children] Sending SIGTERM"
...
@@ -398,7 +398,7 @@ if ! $DUMMY_RUN; then
...
@@ -398,7 +398,7 @@ if ! $DUMMY_RUN; then
else
else
for
HOST
in
$NODE_LIST
for
HOST
in
$NODE_LIST
do
do
COMMAND
=
"ssh -tt -l
$SSH_USER_NAME
$KEY_STRING
$SSH_USER_NAME
@
$HOST
$OUTPUTPROC_CMDLINE
"
COMMAND
=
"ssh -tt
-o StrictHostKeyChecking=no
-l
$SSH_USER_NAME
$KEY_STRING
$SSH_USER_NAME
@
$HOST
$OUTPUTPROC_CMDLINE
"
echo
"[outputProc] Starting
$COMMAND
"
echo
"[outputProc] Starting
$COMMAND
"
$COMMAND
&
$COMMAND
&
...
@@ -434,11 +434,11 @@ do
...
@@ -434,11 +434,11 @@ do
[
"
$h
"
==
"
`
hostname
`
"
]
&&
continue
;
[
"
$h
"
==
"
`
hostname
`
"
]
&&
continue
;
# Ignore hosts that already have the same parset (for example, through NFS).
# Ignore hosts that already have the same parset (for example, through NFS).
timeout
$KILLOPT
5s ssh
-qn
$h
"[ -f
$PARSET
] && echo
\"
$cksumline
\"
| md5sum -c --status"
&&
continue
timeout
$KILLOPT
5s ssh
-o
StrictHostKeyChecking
=
no
-qn
$h
"[ -f
$PARSET
] && echo
\"
$cksumline
\"
| md5sum -c --status"
&&
continue
# Copy parset to remote node
# Copy parset to remote node
echo
"Copying parset to
$h
:
$PARSET
"
echo
"Copying parset to
$h
:
$PARSET
"
timeout
$KILLOPT
30s scp
-Bq
$PARSET
$h
:
$PARSET
||
error
"[parset] Could not scp parset to
$h
"
timeout
$KILLOPT
30s scp
-o
StrictHostKeyChecking
=
no
-Bq
$PARSET
$h
:
$PARSET
||
error
"[parset] Could not scp parset to
$h
"
done
done
# ************************************
# ************************************
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment