Skip to content
Snippets Groups Projects
Commit 509fdeb0 authored by Jan David Mol's avatar Jan David Mol
Browse files

L2SS-1323: Ignore status code 255 when determining to restart a device, so we...

L2SS-1323: Ignore status code 255 when determining to restart a device, so we only restart on recoverable failures.
parent a0f13b97
No related branches found
No related tags found
1 merge request!596Resolve L2SS-1323 "Dont uselessly restart tango containers"
Showing
with 69 additions and 19 deletions
#!/bin/bash
# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy)
# SPDX-License-Identifier: Apache-2.0
#
# This script waraps Tango Controls device servers such that they return
# exit code 0 when they should not be restarted. This allows them to
# be run effectively under the "restart: on-failure" strategy of running
# Docker containers.
#
# Exit code 0 will be returned under the following conditions:
# - Process completed succesfully (Tango returns exit code 0).
# - Process failed but an immediate restart would be useless
# (Tango returns exit code 255 ("exit(-1)" in its code base).
#
# Failures for which an immediate restart of the process would be
# useless include:
# - Device server is not found in the Tango Database
# - Mandatory properties of devices are missing in the Tango Database
# - Tango Database cannot be reached
#
# In all other cases, the non-zero exit code is propagated to Docker,
# resulting in a restart of the container. This includes for example
# Segmentation Faults.
#
# If the process is not wrapped with this script, the above conditions
# would result in a flurry of non-stop restarts of the container,
# thus stressing the system needlessly and spamming the logs.
set -e
# run command in background
"$@" &
PID=$!
# propagate signals sent to us to the command
trap 'kill -INT $PID' INT
trap 'kill -TERM $PID' TERM
# wait for the command to finish
wait $PID
RESULT=$?
if [[ $RESULT -eq 255 ]]; then
# ignore exit status 255, which indicates a fundamental
# error that is useless to retry execution for.
RESULT=0
fi
# our exit code is the command's
exit $RESULT
...@@ -50,4 +50,4 @@ cd "$CWD" || exit 1 ...@@ -50,4 +50,4 @@ cd "$CWD" || exit 1
# Replace this script's process with the actual command, allowing any signals # Replace this script's process with the actual command, allowing any signals
# send to the bash PID to be sent to the command directly. # send to the bash PID to be sent to the command directly.
exec /usr/local/bin/wait-for-it.sh "$TANGO_HOST" --timeout=30 --strict -- "$@" exec /opt/lofar/tango/bin/fix-tango-exit-status.sh /usr/local/bin/wait-for-it.sh "$TANGO_HOST" --timeout=30 --strict -- "$@"
...@@ -51,6 +51,6 @@ services: ...@@ -51,6 +51,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-antennafield AntennaField STAT -v -ORBendPoint giop:tcp:0:5715 -ORBendPointPublish giop:tcp:${HOSTNAME}:5715 - l2ss-antennafield AntennaField STAT -v -ORBendPoint giop:tcp:0:5715 -ORBendPointPublish giop:tcp:${HOSTNAME}:5715
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -50,6 +50,6 @@ services: ...@@ -50,6 +50,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-beamlet Beamlet STAT -v -ORBendPoint giop:tcp:0:5712 -ORBendPointPublish giop:tcp:${HOSTNAME}:5712 - l2ss-beamlet Beamlet STAT -v -ORBendPoint giop:tcp:0:5712 -ORBendPointPublish giop:tcp:${HOSTNAME}:5712
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -49,6 +49,6 @@ services: ...@@ -49,6 +49,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-boot Boot STAT -v -ORBendPoint giop:tcp:0:5708 -ORBendPointPublish giop:tcp:${HOSTNAME}:5708 - l2ss-boot Boot STAT -v -ORBendPoint giop:tcp:0:5708 -ORBendPointPublish giop:tcp:${HOSTNAME}:5708
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -55,6 +55,6 @@ services: ...@@ -55,6 +55,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-bst BST STAT -v -ORBendPoint giop:tcp:0:5717 -ORBendPointPublish giop:tcp:${HOSTNAME}:5717 - l2ss-bst BST STAT -v -ORBendPoint giop:tcp:0:5717 -ORBendPointPublish giop:tcp:${HOSTNAME}:5717
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -52,7 +52,7 @@ services: ...@@ -52,7 +52,7 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-calibration Calibration STAT -v -ORBendPoint giop:tcp:0:5724 -ORBendPointPublish giop:tcp:${HOSTNAME}:5724 - l2ss-calibration Calibration STAT -v -ORBendPoint giop:tcp:0:5724 -ORBendPointPublish giop:tcp:${HOSTNAME}:5724
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
depends_on: depends_on:
......
...@@ -50,6 +50,6 @@ services: ...@@ -50,6 +50,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-digitalbeam DigitalBeam STAT -v -ORBendPoint giop:tcp:0:5713 -ORBendPointPublish giop:tcp:${HOSTNAME}:5713 - l2ss-digitalbeam DigitalBeam STAT -v -ORBendPoint giop:tcp:0:5713 -ORBendPointPublish giop:tcp:${HOSTNAME}:5713
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -52,6 +52,6 @@ services: ...@@ -52,6 +52,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-docker Docker STAT -v -ORBendPoint giop:tcp:0:5705 -ORBendPointPublish giop:tcp:${HOSTNAME}:5705 - l2ss-docker Docker STAT -v -ORBendPoint giop:tcp:0:5705 -ORBendPointPublish giop:tcp:${HOSTNAME}:5705
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -49,7 +49,7 @@ services: ...@@ -49,7 +49,7 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-observationcontrol ObservationControl STAT -v -ORBendPoint giop:tcp:0:5703 -ORBendPointPublish giop:tcp:${HOSTNAME}:5703 - l2ss-observationcontrol ObservationControl STAT -v -ORBendPoint giop:tcp:0:5703 -ORBendPointPublish giop:tcp:${HOSTNAME}:5703
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
depends_on: depends_on:
......
...@@ -48,6 +48,6 @@ services: ...@@ -48,6 +48,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-observation Observation STAT -v -ORBendPoint giop:tcp:0:5718 -ORBendPointPublish giop:tcp:${HOSTNAME}:5718 - l2ss-observation Observation STAT -v -ORBendPoint giop:tcp:0:5718 -ORBendPointPublish giop:tcp:${HOSTNAME}:5718
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -45,6 +45,6 @@ services: ...@@ -45,6 +45,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-pcon pcon STAT -v -ORBendPoint giop:tcp:device-pcon:5720 -ORBendPointPublish giop:tcp:${HOSTNAME}:5720 - l2ss-pcon pcon STAT -v -ORBendPoint giop:tcp:device-pcon:5720 -ORBendPointPublish giop:tcp:${HOSTNAME}:5720
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -45,6 +45,6 @@ services: ...@@ -45,6 +45,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-psoc PSOC STAT -v -ORBendPoint giop:tcp:device-psoc:5719 -ORBendPointPublish giop:tcp:${HOSTNAME}:5719 - l2ss-psoc PSOC STAT -v -ORBendPoint giop:tcp:device-psoc:5719 -ORBendPointPublish giop:tcp:${HOSTNAME}:5719
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -50,6 +50,6 @@ services: ...@@ -50,6 +50,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-rcu2h RCU2H STAT -v -ORBendPoint giop:tcp:device-rcu2h:5725 -ORBendPointPublish giop:tcp:${HOSTNAME}:5725 - l2ss-rcu2h RCU2H STAT -v -ORBendPoint giop:tcp:device-rcu2h:5725 -ORBendPointPublish giop:tcp:${HOSTNAME}:5725
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -50,6 +50,6 @@ services: ...@@ -50,6 +50,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-rcu2l RCU2L STAT -v -ORBendPoint giop:tcp:device-rcu2l:5726 -ORBendPointPublish giop:tcp:${HOSTNAME}:5726 - l2ss-rcu2l RCU2L STAT -v -ORBendPoint giop:tcp:device-rcu2l:5726 -ORBendPointPublish giop:tcp:${HOSTNAME}:5726
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -50,6 +50,6 @@ services: ...@@ -50,6 +50,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-sdp SDP STAT -v -ORBendPoint giop:tcp:device-sdp:5701 -ORBendPointPublish giop:tcp:${HOSTNAME}:5701 - l2ss-sdp SDP STAT -v -ORBendPoint giop:tcp:device-sdp:5701 -ORBendPointPublish giop:tcp:${HOSTNAME}:5701
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -55,6 +55,6 @@ services: ...@@ -55,6 +55,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-sst SST STAT -v -ORBendPoint giop:tcp:0:5702 -ORBendPointPublish giop:tcp:${HOSTNAME}:5702 - l2ss-sst SST STAT -v -ORBendPoint giop:tcp:0:5702 -ORBendPointPublish giop:tcp:${HOSTNAME}:5702
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -45,6 +45,6 @@ services: ...@@ -45,6 +45,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-station-manager StationManager STAT -v -ORBendPoint giop:tcp:device-station-manager:5723 -ORBendPointPublish giop:tcp:${HOSTNAME}:5723 - l2ss-station-manager StationManager STAT -v -ORBendPoint giop:tcp:device-station-manager:5723 -ORBendPointPublish giop:tcp:${HOSTNAME}:5723
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -45,6 +45,6 @@ services: ...@@ -45,6 +45,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-temperaturemanager TemperatureManager STAT -v -ORBendPoint giop:tcp:0:5716 -ORBendPointPublish giop:tcp:${HOSTNAME}:5716 - l2ss-temperaturemanager TemperatureManager STAT -v -ORBendPoint giop:tcp:0:5716 -ORBendPointPublish giop:tcp:${HOSTNAME}:5716
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
...@@ -46,6 +46,6 @@ services: ...@@ -46,6 +46,6 @@ services:
# configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA # configure CORBA to _listen_ on 0:port, but tell others we're _reachable_ through ${HOSTNAME}:port, since CORBA
# can't know about our Docker port forwarding # can't know about our Docker port forwarding
- l2ss-tilebeam TileBeam STAT -v -ORBendPoint giop:tcp:0:5711 -ORBendPointPublish giop:tcp:${HOSTNAME}:5711 - l2ss-tilebeam TileBeam STAT -v -ORBendPoint giop:tcp:0:5711 -ORBendPointPublish giop:tcp:${HOSTNAME}:5711
restart: unless-stopped restart: on-failure
stop_signal: SIGINT # request a graceful shutdown of Tango stop_signal: SIGINT # request a graceful shutdown of Tango
stop_grace_period: 2s stop_grace_period: 2s
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment