Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
LOFAR
Manage
Activity
Members
Labels
Plan
Issues
Wiki
Jira issues
Open Jira
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
RadioObservatory
LOFAR
Commits
2f9d3cc6
Commit
2f9d3cc6
authored
9 years ago
by
Jan David Mol
Browse files
Options
Downloads
Patches
Plain Diff
Task #8443: Clearer log output
parent
8eccc545
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh
+26
-27
26 additions, 27 deletions
RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh
with
26 additions
and
27 deletions
RTCP/Cobalt/GPUProc/src/scripts/runObservation.sh
+
26
−
27
View file @
2f9d3cc6
...
@@ -89,11 +89,11 @@ function sendback_state {
...
@@ -89,11 +89,11 @@ function sendback_state {
if
[
$OBSRESULT
-eq
0
]
if
[
$OBSRESULT
-eq
0
]
then
then
echo
"Signalling success"
echo
"
[cobalt]
Signalling success"
SUCCESS
=
1
SUCCESS
=
1
else
else
# ***** Observation or sending feedback failed for some reason
# ***** Observation or sending feedback failed for some reason
echo
"Signalling failure"
echo
"
[cobalt]
Signalling failure"
SUCCESS
=
0
SUCCESS
=
0
fi
fi
...
@@ -182,8 +182,8 @@ PARSET="$1"
...
@@ -182,8 +182,8 @@ PARSET="$1"
[
-n
"
$PARSET
"
]
||
usage
[
-n
"
$PARSET
"
]
||
usage
# Check if LOFARROOT is set.
# Check if LOFARROOT is set.
[
-n
"
$LOFARROOT
"
]
||
error
"LOFARROOT is not set!"
[
-n
"
$LOFARROOT
"
]
||
error
"
[cobalt]
LOFARROOT is not set!"
echo
"LOFARROOT
is set to
$LOFARROOT
"
echo
"
[cobalt]
LOFARROOT
=
$LOFARROOT
"
# ******************************
# ******************************
# Preprocess: initialise
# Preprocess: initialise
...
@@ -203,10 +203,10 @@ fi
...
@@ -203,10 +203,10 @@ fi
timeout
-k2
1 /bin/true 2> /dev/null
&&
KILLOPT
=
-k2
timeout
-k2
1 /bin/true 2> /dev/null
&&
KILLOPT
=
-k2
# Read parset
# Read parset
[
-f
"
$PARSET
"
-a
-r
"
$PARSET
"
]
||
error
"Cannot read
parset
:
$PARSET
"
[
-f
"
$PARSET
"
-a
-r
"
$PARSET
"
]
||
error
"
[parset]
Cannot read:
$PARSET
"
OBSID
=
`
getkey Observation.ObsID
`
OBSID
=
`
getkey Observation.ObsID
`
echo
"
Observation ID:
$OBSID
"
echo
"
[cobalt] ObsID =
$OBSID
"
# Remove stale feedback file (useful for testing)
# Remove stale feedback file (useful for testing)
FEEDBACK_FILE
=
$LOFARROOT
/var/run/Observation
${
OBSID
}
_feedback
FEEDBACK_FILE
=
$LOFARROOT
/var/run/Observation
${
OBSID
}
_feedback
...
@@ -226,8 +226,8 @@ then
...
@@ -226,8 +226,8 @@ then
DOT_COBALT_DEFAULT
=
$HOME
/.cobalt/default/
*
.parset
DOT_COBALT_DEFAULT
=
$HOME
/.cobalt/default/
*
.parset
DOT_COBALT_OVERRIDE
=
$HOME
/.cobalt/override/
*
.parset
DOT_COBALT_OVERRIDE
=
$HOME
/.cobalt/override/
*
.parset
if
[
"
$USER
"
==
"lofarsys"
]
;
then
if
[
"
$USER
"
==
"lofarsys"
]
;
then
ls
-U
--
$DOT_COBALT_DEFAULT
>
/dev/null 2>&1
&&
echo
"WARN: ignoring augmentation parset(s)
$DOT_COBALT_DEFAULT
"
>
&2
ls
-U
--
$DOT_COBALT_DEFAULT
>
/dev/null 2>&1
&&
echo
"
[parset]
WARN: ignoring augmentation parset(s)
$DOT_COBALT_DEFAULT
"
>
&2
ls
-U
--
$DOT_COBALT_OVERRIDE
>
/dev/null 2>&1
&&
echo
"WARN: ignoring augmentation parset(s)
$DOT_COBALT_OVERRIDE
"
>
&2
ls
-U
--
$DOT_COBALT_OVERRIDE
>
/dev/null 2>&1
&&
echo
"
[parset]
WARN: ignoring augmentation parset(s)
$DOT_COBALT_OVERRIDE
"
>
&2
unset
DOT_COBALT_DEFAULT DOT_COBALT_OVERRIDE
unset
DOT_COBALT_DEFAULT DOT_COBALT_OVERRIDE
fi
fi
...
@@ -239,7 +239,7 @@ then
...
@@ -239,7 +239,7 @@ then
$PARSET
\
$PARSET
\
$LOFARROOT
/etc/parset-additions.d/override/
*
.parset
\
$LOFARROOT
/etc/parset-additions.d/override/
*
.parset
\
$DOT_COBALT_OVERRIDE
\
$DOT_COBALT_OVERRIDE
\
>
$AUGMENTED_PARSET
||
error
"Could not create
parset
$AUGMENTED_PARSET
"
>
$AUGMENTED_PARSET
||
error
"
[parset]
Could not create
$AUGMENTED_PARSET
"
eval
$nullglob_state
eval
$nullglob_state
# Use the new one from now on
# Use the new one from now on
...
@@ -288,7 +288,7 @@ if [ -z "$HOSTS" ]; then
...
@@ -288,7 +288,7 @@ if [ -z "$HOSTS" ]; then
HOSTS
=
localhost
HOSTS
=
localhost
fi
fi
echo
"Hosts
:
$HOSTS
"
echo
"
[cobalt]
Hosts
=
$HOSTS
"
# Copy parset to all hosts
# Copy parset to all hosts
cksumline
=
`
md5sum
$PARSET
`
cksumline
=
`
md5sum
$PARSET
`
...
@@ -306,7 +306,7 @@ do
...
@@ -306,7 +306,7 @@ do
# Copy parset to remote node
# Copy parset to remote node
echo
"Copying parset to
$h
:
$PARSET
"
echo
"Copying parset to
$h
:
$PARSET
"
timeout
$KILLOPT
30s scp
-Bq
$PARSET
$h
:
$PARSET
||
error
"Could not
copy
parset to
$h
"
timeout
$KILLOPT
30s scp
-Bq
$PARSET
$h
:
$PARSET
||
error
"
[parset]
Could not
scp
parset to
$h
"
done
done
# ************************************
# ************************************
...
@@ -347,7 +347,7 @@ if $GLOBALFS; then
...
@@ -347,7 +347,7 @@ if $GLOBALFS; then
NODE_LIST
=
$(
getOutputProcHosts
$PARSET
)
NODE_LIST
=
$(
getOutputProcHosts
$PARSET
)
fi
fi
echo
"
Node li
st:
$NODE_LIST
"
echo
"
[outputProc] Ho
st
s
:
$NODE_LIST
"
# If parameters are found in the parset create a key_string for ssh command
# If parameters are found in the parset create a key_string for ssh command
if
[
"
$SSH_PRIVATE_KEY
"
!=
""
]
if
[
"
$SSH_PRIVATE_KEY
"
!=
""
]
...
@@ -360,7 +360,7 @@ then
...
@@ -360,7 +360,7 @@ then
fi
fi
# test the connection with local host: minimal test for valid credentials
# test the connection with local host: minimal test for valid credentials
ssh
-l
$SSH_USER_NAME
$KEY_STRING
"localhost"
"/bin/true"
||
error
"Failed to
create a connection
to localhost
, ssh error
"
ssh
-l
$SSH_USER_NAME
$KEY_STRING
"localhost"
"/bin/true"
||
error
"
[cobalt]
Failed to
ssh
to localhost"
# Create a helper function for delete child processes and
# Create a helper function for delete child processes and
# a file containing the PID of these processes
# a file containing the PID of these processes
...
@@ -376,20 +376,20 @@ function clean_up {
...
@@ -376,20 +376,20 @@ function clean_up {
EXIT_STATE
=
$1
EXIT_STATE
=
$1
PID_LIST
=
$2
PID_LIST
=
$2
echo
"
Cleaning up child processes.
Sending SIGTERM"
echo
"
[children]
Sending SIGTERM"
# THe kill statements might be called with an empty argument. This will
# THe kill statements might be called with an empty argument. This will
# result in an exit state 1. But the error is redirected to dev/null.
# result in an exit state 1. But the error is redirected to dev/null.
kill
$(
cat
$PID_LIST_FILE
)
2> /dev/null
kill
$(
cat
$PID_LIST_FILE
)
2> /dev/null
kill
$PID_LIST
2> /dev/null
kill
$PID_LIST
2> /dev/null
echo
"Waiting 2 seconds for soft shutdown"
echo
"
[children]
Waiting 2 seconds for soft shutdown"
sleep
2
sleep
2
echo
"Sending SIGKILL"
echo
"
[children]
Sending SIGKILL"
kill
-9
$(
cat
$PID_LIST_FILE
)
2> /dev/null
kill
-9
$(
cat
$PID_LIST_FILE
)
2> /dev/null
kill
-9
$PID_LIST
2> /dev/null
kill
-9
$PID_LIST
2> /dev/null
echo
"
removing Childprocess pid list
file"
echo
"
[children] Removing pid
file"
rm
-f
$PID_LIST_FILE
rm
-f
$PID_LIST_FILE
exit
$EXIT_STATE
exit
$EXIT_STATE
...
@@ -402,7 +402,7 @@ trap 'clean_up 1' SIGTERM SIGINT SIGQUIT SIGHUP
...
@@ -402,7 +402,7 @@ trap 'clean_up 1' SIGTERM SIGINT SIGQUIT SIGHUP
# Start output procs in a seperate function
# Start output procs in a seperate function
# Save file for started child processes
# Save file for started child processes
# Use helper program to get the list of hosts from parset
# Use helper program to get the list of hosts from parset
echo
"outputProc p
rocesses are appended to the
file
:
$PID_LIST_FILE
"
echo
"
[
outputProc
]
p
id
file
=
$PID_LIST_FILE
"
touch
$PID_LIST_FILE
touch
$PID_LIST_FILE
# Construct full command line for outputProc
# Construct full command line for outputProc
...
@@ -424,7 +424,7 @@ if ! $DUMMY_RUN; then
...
@@ -424,7 +424,7 @@ if ! $DUMMY_RUN; then
if
$SLURM
;
then
if
$SLURM
;
then
# The nodes we need (and can use) are part of this job
# The nodes we need (and can use) are part of this job
COMMAND
=
"srun -N
$SLURM_JOB_NUM_NODES
$OUTPUTPROC_CMDLINE
"
COMMAND
=
"srun -N
$SLURM_JOB_NUM_NODES
$OUTPUTPROC_CMDLINE
"
echo
"Starting
$COMMAND
"
echo
"
[outputProc]
Starting
$COMMAND
"
$COMMAND
&
$COMMAND
&
PID
=
$!
PID
=
$!
...
@@ -434,7 +434,7 @@ if ! $DUMMY_RUN; then
...
@@ -434,7 +434,7 @@ if ! $DUMMY_RUN; then
for
HOST
in
$NODE_LIST
for
HOST
in
$NODE_LIST
do
do
COMMAND
=
"ssh -tt -l
$SSH_USER_NAME
$KEY_STRING
$SSH_USER_NAME
@
$HOST
$OUTPUTPROC_CMDLINE
"
COMMAND
=
"ssh -tt -l
$SSH_USER_NAME
$KEY_STRING
$SSH_USER_NAME
@
$HOST
$OUTPUTPROC_CMDLINE
"
echo
"Starting
$COMMAND
"
echo
"
[outputProc]
Starting
$COMMAND
"
command_retry
"
$COMMAND
"
&
# Start retrying function in the background
command_retry
"
$COMMAND
"
&
# Start retrying function in the background
PID
=
$!
# get the pid
PID
=
$!
# get the pid
...
@@ -448,7 +448,6 @@ fi
...
@@ -448,7 +448,6 @@ fi
# Start rtcp
# Start rtcp
# ***********************************
# ***********************************
echo
"[cobalt] LOFARROOT =
$LOFARROOT
"
echo
"[cobalt] parset =
$PARSET
"
echo
"[cobalt] parset =
$PARSET
"
# Run in the background to allow signals to propagate
# Run in the background to allow signals to propagate
...
@@ -470,7 +469,7 @@ fi
...
@@ -470,7 +469,7 @@ fi
PID
=
$!
PID
=
$!
# Propagate SIGTERM
# Propagate SIGTERM
trap
"echo runObservation.sh: Received signal
cleaning up child processes
; clean_up 1
$PID
"
SIGTERM SIGINT SIGQUIT SIGHUP
trap
"echo
'[cobalt]
runObservation.sh: Received signal
.'
; clean_up 1
$PID
"
SIGTERM SIGINT SIGQUIT SIGHUP
# Wait for $COMMAND to finish. We use 'wait' because it will exit immediately if it
# Wait for $COMMAND to finish. We use 'wait' because it will exit immediately if it
# receives a signal.
# receives a signal.
...
@@ -481,7 +480,7 @@ trap "echo runObservation.sh: Received signal cleaning up child processes; clean
...
@@ -481,7 +480,7 @@ trap "echo runObservation.sh: Received signal cleaning up child processes; clean
wait
$PID
wait
$PID
OBSRESULT
=
$?
OBSRESULT
=
$?
echo
"
Resul
t code of observation:
$OBSRESULT
"
echo
"
[cobalt] Exi
t code of observation:
$OBSRESULT
"
# Return codes of rtcp:
# Return codes of rtcp:
# 0 = success
# 0 = success
...
@@ -495,7 +494,7 @@ then
...
@@ -495,7 +494,7 @@ then
#
#
# Note that we might miss failures detected by rtcp, such as
# Note that we might miss failures detected by rtcp, such as
# missing final meta data!
# missing final meta data!
echo
"Found feed-back file
$FEEDBACK_FILE
, considering the observation succesful."
echo
"
[cobalt]
Found feed-back file
$FEEDBACK_FILE
, considering the observation succesful."
OBSRESULT
=
0
OBSRESULT
=
0
fi
fi
...
@@ -507,16 +506,16 @@ fi
...
@@ -507,16 +506,16 @@ fi
sendback_state
"
$OBSRESULT
"
sendback_state
"
$OBSRESULT
"
# clean up outputProc children
# clean up outputProc children
echo
"
Allowing
120 second for normal end
of outputProc
"
echo
"
[outputProc] Waiting up to
120 second
s
for normal end"
# Set trap to kill the sleep in case of signals save the pid of sleep
# Set trap to kill the sleep in case of signals save the pid of sleep
(
trap
'kill $SLEEP_PID'
SIGTERM SIGINT SIGQUIT SIGHUP
;
sleep
120
&
SLEEP_PID
=
$!
;
echo
'
Starting forced cleanup outputProc:'
;
clean_up 0
)
&
(
trap
'kill $SLEEP_PID'
SIGTERM SIGINT SIGQUIT SIGHUP
;
sleep
120
&
SLEEP_PID
=
$!
;
echo
'
[outputProc] Killing'
;
wait
$SLEEP_PID
;
clean_up 0
)
&
KILLER_PID
=
$!
KILLER_PID
=
$!
# Waiting for the child processes to finish
# Waiting for the child processes to finish
LIST_OF_PIDS_TO_WAIT_FOR
=
$(
cat
$PID_LIST_FILE
)
LIST_OF_PIDS_TO_WAIT_FOR
=
$(
cat
$PID_LIST_FILE
)
if
[
"
$LIST_OF_PIDS_TO_WAIT_FOR
"
!=
""
]
# if there are outputProc pid working
if
[
"
$LIST_OF_PIDS_TO_WAIT_FOR
"
!=
""
]
# if there are outputProc pid working
then
then
echo
"
waiting for
output
p
roc
s
"
echo
"
[
output
P
roc
] Waiting...
"
wait
$(
cat
$PID_LIST_FILE
)
2> /dev/null
wait
$(
cat
$PID_LIST_FILE
)
2> /dev/null
fi
fi
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment