diff --git a/CEP/Pipeline/framework/lofarpipe/monitoring/poller.py b/CEP/Pipeline/framework/lofarpipe/monitoring/poller.py
index bef857df97372b79c0324c1b2e56f611c4e2682b..9243a008cf04283ad4df735357351cdcb46601f1 100644
--- a/CEP/Pipeline/framework/lofarpipe/monitoring/poller.py
+++ b/CEP/Pipeline/framework/lofarpipe/monitoring/poller.py
@@ -5,6 +5,7 @@ import os
 import tempfile
 
 from lofarpipe.support.xmllogging import add_child
+from lofar.common.subprocess_utils import communicate_returning_strings
 import xml.dom.minidom as xml
 
 
@@ -65,7 +66,7 @@ class UsageStats(threading.Thread):
                     pps = subprocess.Popen(["bash", temp_path, str(pid)],
                                stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
-                    out, err = pps.communicate()
+                    out, err = communicate_returning_strings(pps)
 
                     parset_output = eval(out.rstrip()) # remove trailing white space
                     self.pid_stats[pid].append(parset_output)
diff --git a/CEP/Pipeline/framework/lofarpipe/support/group_data.py b/CEP/Pipeline/framework/lofarpipe/support/group_data.py
index 1cd51d2d98228a06d4c123d65f3c9e4523315f2e..3544e35d7162cdab8ed806fb14035b6786739cbf 100644
--- a/CEP/Pipeline/framework/lofarpipe/support/group_data.py
+++ b/CEP/Pipeline/framework/lofarpipe/support/group_data.py
@@ -14,6 +14,7 @@ import subprocess
 import lofarpipe.support.utilities as utilities
 from lofarpipe.support.clusterdesc import get_compute_nodes
 from lofarpipe.support.parset import Parset
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 # Data map methods were moved to a separate module. 
 # Importing them for backward compatibility.
@@ -47,7 +48,7 @@ def group_files(logger, clusterdesc, node_directory, group_size, filenames):
         my_process = subprocess.Popen(
             exec_string, stdout=subprocess.PIPE, stderr=subprocess.PIPE
         )
-        sout = my_process.communicate()[0]
+        sout,_ = communicate_returning_strings(my_process)
         data[node] = sout.split('\x00')
         data[node] = utilities.group_iterable(
             [element for element in data[node] if element in filenames],
diff --git a/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py b/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py
index 83a31d8c6a83fd605f0d38733752a071236d681c..e4c6e5431e303c8ce319b4a5c603e07520f3b187 100644
--- a/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py
+++ b/CEP/Pipeline/framework/lofarpipe/support/remotecommand.py
@@ -23,6 +23,7 @@ from lofarpipe.support.jobserver import job_server
 import lofarpipe.support.lofaringredient as ingredient
 from lofarpipe.support.xmllogging import add_child
 from subprocess import Popen, PIPE
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 # By default, Linux allocates lots more memory than we need(?) for a new stack
 # frame. When multiplexing lots of threads, that will cause memory issues.
@@ -501,7 +502,7 @@ class RemoteCommandRecipeMixIn(object):
             nodeliststr = []
             hargs = ['srun','hostname']
             proc = Popen(hargs, False, stdout=PIPE, stderr=None)
-            tup = proc.communicate()
+            tup = communicate_returning_strings(proc)
             nodeliststr = tup[0].rstrip('\n').split('\n')
             # remove duplicates. order not important
             nodeliststr = list(set(nodeliststr))
diff --git a/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py b/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py
index 28a4582154f286ff729bbc0be0b7ba585db9eac4..d034319a45ca823970737c23b9ed5bb5bcd58d08 100644
--- a/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py
+++ b/CEP/Pipeline/framework/lofarpipe/support/subprocessgroup.py
@@ -4,6 +4,7 @@ import sys
 import fcntl
 import time
 from lofarpipe.support.lofarexceptions import PipelineException
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 # subprocess is broken in python <=2.6. It does not work for fds > 1024 for example.
 try:
@@ -81,7 +82,7 @@ class SubProcess(object):
             return False
 
         # Process is finished, read remaining data and exit code
-        (stdout, stderr) = self.process.communicate()
+        (stdout, stderr) = communicate_returning_strings(self.process)
         self.exit_status = self.process.returncode
 
         self._addoutput(self.STDOUT, stdout, flush=True)
diff --git a/CEP/Pipeline/framework/lofarpipe/support/usagestats.py b/CEP/Pipeline/framework/lofarpipe/support/usagestats.py
index 7c160d2ce7b772acfdad0fd4563025182475f406..d3b83646b0d932ffefe5538aa433b4e80cd8ca3c 100644
--- a/CEP/Pipeline/framework/lofarpipe/support/usagestats.py
+++ b/CEP/Pipeline/framework/lofarpipe/support/usagestats.py
@@ -39,6 +39,7 @@ import os
 import tempfile
 
 from lofarpipe.support.xmllogging import add_child
+from lofar.common.subprocess_utils import communicate_returning_strings
 import xml.dom.minidom as xml
 
 
@@ -149,7 +150,7 @@ class UsageStats(threading.Thread):
                 pps = subprocess.Popen(["bash", self.temp_path, str(pid)],
                             stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
-                out, err = pps.communicate()
+                out, err = communicate_returning_strings(pps)
                 # Store pids that are not active anymore (process has closed)
                 if not pps.returncode == 0:
                     pid_out.append(pid)
diff --git a/CEP/Pipeline/recipes/examples/master/example.py b/CEP/Pipeline/recipes/examples/master/example.py
index a3e3a8b4fc1bc36ed989148b04d231923997e8f5..c3930e72397163dcd813cbc19aaf83e2b98d34c9 100644
--- a/CEP/Pipeline/recipes/examples/master/example.py
+++ b/CEP/Pipeline/recipes/examples/master/example.py
@@ -14,6 +14,7 @@ import subprocess
 from lofarpipe.support.baserecipe import BaseRecipe
 from lofarpipe.support.lofaringredient import ExecField, StringField
 from lofarpipe.support.pipelinelogging import log_process_output
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 class example(BaseRecipe):
     inputs = {
@@ -41,7 +42,7 @@ class example(BaseRecipe):
             stdout = subprocess.PIPE,
             stderr = subprocess.PIPE
         )
-        sout, serr = my_process.communicate()
+        sout, serr = communicate_returning_strings(my_process)
         self.outputs['stdout'] = sout
         log_process_output(
             self.inputs['executable'],
diff --git a/CEP/Pipeline/recipes/sip/master/new_bbs.py b/CEP/Pipeline/recipes/sip/master/new_bbs.py
index 23f3e3a8be8d3ed0ce260de5654ca2091d3d2dac..d0162ab1f36c3af11a6153085dc534b13318a3e1 100644
--- a/CEP/Pipeline/recipes/sip/master/new_bbs.py
+++ b/CEP/Pipeline/recipes/sip/master/new_bbs.py
@@ -30,6 +30,7 @@ from lofarpipe.support.jobserver import job_server
 import lofarpipe.support.utilities as utilities
 import lofarpipe.support.lofaringredient as ingredient
 from lofarpipe.support.utilities import create_directory
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 
 class new_bbs(BaseRecipe):
@@ -344,7 +345,7 @@ class new_bbs(BaseRecipe):
             return 1
         result = self._monitor_process(bbs_kernel_process,
                                        "BBS Kernel on %s" % host)
-        sout, serr = bbs_kernel_process.communicate()
+        sout, serr = communicate_returning_strings(bbs_kernel_process)
         serr = serr.replace("Connection to %s closed.\r\n" % host, "")
         log_process_output("SSH session (BBS kernel)", sout, serr, self.logger)
         return result
@@ -387,7 +388,7 @@ class new_bbs(BaseRecipe):
             returncode = self._monitor_process(
                 bbs_control_process, "BBS Control"
             )
-            sout, serr = bbs_control_process.communicate()
+            sout, serr = communicate_returning_strings(bbs_control_process)
         shutil.rmtree(working_dir)
         log_process_output(
             self.inputs['control_exec'], sout, serr, self.logger
diff --git a/CEP/Pipeline/recipes/sip/master/setupparmdb.py b/CEP/Pipeline/recipes/sip/master/setupparmdb.py
index b58baf46144a9b671b1d42ee0edc7ba11f44dba0..1736f0547e0ad8ba53788c57abd26619d0bae641 100644
--- a/CEP/Pipeline/recipes/sip/master/setupparmdb.py
+++ b/CEP/Pipeline/recipes/sip/master/setupparmdb.py
@@ -20,6 +20,7 @@ from lofarpipe.support.remotecommand import ComputeJob
 from lofarpipe.support.data_map import DataMap, validate_data_maps
 from lofarpipe.support.pipelinelogging import log_process_output
 import lofarpipe.support.lofaringredient as ingredient
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 template = """
 create tablename="%s"
@@ -103,7 +104,7 @@ class setupparmdb(BaseRecipe, RemoteCommandRecipeMixIn):
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE
             )
-            sout, serr = parmdbm_process.communicate(template % pdbfile)
+            sout, serr = communicate_returning_strings(input=(template % pdbfile))
             log_process_output("parmdbm", sout, serr, self.logger)
         except OSError as err:
             self.logger.error("Failed to spawn parmdbm: %s" % str(err))
diff --git a/CEP/Pipeline/recipes/sip/master/vdsmaker.py b/CEP/Pipeline/recipes/sip/master/vdsmaker.py
index 89699ade8a28bc6008cb33e188da8374c4b42ad1..a38bac8d0d210f015c6d66c30f19016e0c5cd73a 100644
--- a/CEP/Pipeline/recipes/sip/master/vdsmaker.py
+++ b/CEP/Pipeline/recipes/sip/master/vdsmaker.py
@@ -17,6 +17,7 @@ from lofarpipe.support.remotecommand import RemoteCommandRecipeMixIn
 from lofarpipe.support.remotecommand import ComputeJob
 from lofarpipe.support.data_map import DataMap
 from lofarpipe.support.pipelinelogging import log_process_output
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 class vdsmaker(BaseRecipe, RemoteCommandRecipeMixIn):
     """
@@ -128,7 +129,7 @@ class vdsmaker(BaseRecipe, RemoteCommandRecipeMixIn):
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE
             )
-            sout, serr = combineproc.communicate()
+            sout, serr = communicate_returning_strings(combineproc)
             log_process_output(executable, sout, serr, self.logger)
             if combineproc.returncode != 0:
                 raise subprocess.CalledProcessError(
diff --git a/CEP/Pipeline/recipes/sip/nodes/copier.py b/CEP/Pipeline/recipes/sip/nodes/copier.py
index 9b3c44a65c1bc88f7565ef67ae5f1f3c3cf3c3f4..cf6d16f518d1178f25403f55f4c28a9cfcfed866 100644
--- a/CEP/Pipeline/recipes/sip/nodes/copier.py
+++ b/CEP/Pipeline/recipes/sip/nodes/copier.py
@@ -15,6 +15,8 @@ from lofarpipe.support.pipelinelogging import log_time
 from lofarpipe.support.utilities import create_directory
 from lofarpipe.support.group_data import load_data_map
 from lofarpipe.support.lofarexceptions import PipelineException
+from lofar.common.subprocess_utils import communicate_returning_strings
+
 
 
 class copier(LOFARnodeTCP):
@@ -70,7 +72,7 @@ class copier(LOFARnodeTCP):
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
 
-        (stdoutdata, stderrdata) = copy_process.communicate()
+        (stdoutdata, stderrdata) = communicate_returning_strings(copy_process)
         exit_status = copy_process.returncode
         #if copy failed log the missing file
         if  exit_status != 0:
diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py b/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py
index d41ad866e4fff990b8949ba45ce351852493bfc0..98b0192849d465bb3c91838c6e8dd74c5ea7ce52 100644
--- a/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py
+++ b/CEP/Pipeline/recipes/sip/nodes/imager_create_dbs.py
@@ -17,6 +17,7 @@ from lofarpipe.support.pipelinelogging import log_process_output
 from lofarpipe.support.pipelinelogging import CatchLog4CPlus
 from lofarpipe.support.utilities import catch_segfaults
 from lofarpipe.support.utilities import create_directory
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 import monetdb.sql as db
 import lofar.gsm.gsmutils as gsm
@@ -271,7 +272,7 @@ class imager_create_dbs(LOFARnodeTCP):
                 stderr=subprocess.PIPE
             )
             # Send formatted template on stdin
-            sout, serr = parmdbm_process.communicate(formatted_template)
+            sout, serr = communicate_returning_strings(parmdbm_process,input=formatted_template)
 
             # Log the output
             log_process_output("parmdbm", sout, serr, self.logger)
diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py b/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py
index 103b4cd50a1fa3a507ee85d7d9926f8b46104612..6b5c0afce369d59d185c6cd9953209f39e24fd10 100644
--- a/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py
+++ b/CEP/Pipeline/recipes/sip/nodes/imager_finalize.py
@@ -18,6 +18,7 @@ import pyrap.images as pim
 from lofarpipe.support.utilities import catch_segfaults
 from lofarpipe.support.data_map import DataMap
 from lofarpipe.support.pipelinelogging import CatchLog4CPlus
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 import urllib.request, urllib.error, urllib.parse
 import lofarpipe.recipes.helpers.MultipartPostHandler as mph
@@ -140,7 +141,7 @@ class imager_finalize(LOFARnodeTCP):
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
 
-            (stdoutdata, stderrdata) = proc.communicate()
+            (stdoutdata, stderrdata) = communicate_returning_strings(proc)
 
             exit_status = proc.returncode
             self.logger.info(stdoutdata)
diff --git a/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py b/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py
index 073c077424b533dd66a0c3de49fb5d047c5e7b6b..8a817de1ef547e94867825018a6aa2048156ba0d 100644
--- a/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py
+++ b/CEP/Pipeline/recipes/sip/nodes/imager_prepare.py
@@ -21,6 +21,7 @@ from lofarpipe.support.utilities import create_directory
 from lofarpipe.support.data_map import DataMap
 from lofarpipe.support.subprocessgroup import SubProcessGroup
 from lofarpipe.recipes.helpers.data_quality import run_rficonsole, filter_bad_stations
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 # Some constant settings for the recipe
 _time_slice_dir_name = "time_slices"
@@ -198,7 +199,7 @@ class imager_prepare(LOFARnodeTCP):
 
                 # Wait for finish of copy inside the loop: enforce single tread
                 # copy
-                (stdoutdata, stderrdata) = copy_process.communicate()
+                (stdoutdata, stderrdata) = communicate_returning_strings(copy_process)
 
                 exit_status = copy_process.returncode
 
diff --git a/CEP/Pipeline/recipes/sip/nodes/long_baseline.py b/CEP/Pipeline/recipes/sip/nodes/long_baseline.py
index c696b3f165aa4aeb6ffda3d7785b28f8bdee8294..e0cee17767431f1d6bdefdbb0ed82669fc9cb148 100644
--- a/CEP/Pipeline/recipes/sip/nodes/long_baseline.py
+++ b/CEP/Pipeline/recipes/sip/nodes/long_baseline.py
@@ -19,6 +19,7 @@ from lofarpipe.support.lofarnode import  LOFARnodeTCP
 from lofarpipe.support.utilities import create_directory
 from lofarpipe.support.data_map import DataMap
 from lofarpipe.support.subprocessgroup import SubProcessGroup
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 
 # Some constant settings for the recipe
@@ -216,7 +217,7 @@ class long_baseline(LOFARnodeTCP):
 
               # Wait for finish of copy inside the loop: enforce single tread
               # copy
-              (stdoutdata, stderrdata) = copy_process.communicate()
+              (stdoutdata, stderrdata) = communicate_returning_strings(copy_process)
 
               exit_status = copy_process.returncode
 
diff --git a/CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py b/CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py
index bdc6db255fa989fc74fabbd185ad1a8facb6cfd2..243922a114b858724b03f73f5767cfe4bb45aa02 100644
--- a/CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py
+++ b/CEP/Pipeline/recipes/sip/nodes/selfcal_finalize.py
@@ -19,6 +19,7 @@ from lofarpipe.support.utilities import catch_segfaults
 from lofarpipe.support.data_map import DataMap
 from lofarpipe.support.pipelinelogging import CatchLog4CPlus
 from lofarpipe.support.subprocessgroup import SubProcessGroup
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 import urllib.request, urllib.error, urllib.parse
 import lofarpipe.recipes.helpers.MultipartPostHandler as mph
@@ -140,7 +141,7 @@ class selfcal_finalize(LOFARnodeTCP):
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
 
-            (stdoutdata, stderrdata) = proc.communicate()
+            (stdoutdata, stderrdata) = communicate_returning_strings(proc)
 
             exit_status = proc.returncode
 
diff --git a/CMake/FindPythonModule.cmake b/CMake/FindPythonModule.cmake
index 190b85f210d94cdfe71a71188a13f5af5336ff0a..3bc09d08d07fcdbdbd32ee267177ed778e8ef067 100644
--- a/CMake/FindPythonModule.cmake
+++ b/CMake/FindPythonModule.cmake
@@ -26,7 +26,7 @@
 # $Id$
 
 # Search for the Python interpreter.
-find_package(PythonInterp)
+find_package(PythonInterp 3)
 
 # -----------------------------------------------------------------------------
 # find_python_module(module [REQUIRED])
diff --git a/LCS/MessageBus/src/messagebus.py b/LCS/MessageBus/src/messagebus.py
index b231fd49cc157348ba1925233d9a714247e79492..6699a60acaf8187d9c2387bc82b6dbc5dc07d437 100644
--- a/LCS/MessageBus/src/messagebus.py
+++ b/LCS/MessageBus/src/messagebus.py
@@ -49,6 +49,17 @@ else:
 
 logger=logging.getLogger("MessageBus")
 
+# which brokers to use to avoid routing
+if isProductionEnvironment():
+  broker_feedback="mcu001.control.lofar"
+  broker_state="ccu001.control.lofar"
+elif isTestEnvironment():
+  broker_feedback="mcu199.control.lofar"
+  broker_state="ccu199.control.lofar"
+else:
+  broker_feedback="localhost"
+  broker_state="localhost"
+
 #TODO: replace this version of the messagebus by the version in LCS/Messaging/python/messaging
 logger.warning("This version of the messagebus (lofar.messagebus.messagebus) is deprecated and will be replaced by lofar.messaging.messagebus")
 
diff --git a/LCS/MessageDaemons/src/CMakeLists.txt b/LCS/MessageDaemons/src/CMakeLists.txt
index e41e6b4d5228bed7adf4b5d79a2c63971fb97dc8..fca80d1eada57ccb35b39575eda7ccd06bdd1c21 100644
--- a/LCS/MessageDaemons/src/CMakeLists.txt
+++ b/LCS/MessageDaemons/src/CMakeLists.txt
@@ -1,12 +1,5 @@
 # $Id$
 
-include(LofarPackageVersion)
-
-set(messagedaemons_LIB_SRCS
-  Package__Version.cc)
-
-lofar_add_library(messagedaemons ${messagedaemons_LIB_SRCS})
-
 lofar_add_bin_scripts(MessageRouter)
 
 set(MessageRouterConfs
diff --git a/LCS/MessageDaemons/webmonitor/QPIDWebserverJSON b/LCS/MessageDaemons/webmonitor/QPIDWebserverJSON
index e2386a953830614a3f05d0d78790a7bdf990700f..3a3a29fc8c0cd4468fe1a78b13a8cfe46d26e697 100644
--- a/LCS/MessageDaemons/webmonitor/QPIDWebserverJSON
+++ b/LCS/MessageDaemons/webmonitor/QPIDWebserverJSON
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python2
 import time
 import os
 #import json
diff --git a/LCS/MessageDaemons/webmonitor/start_QPIDWebMonitor b/LCS/MessageDaemons/webmonitor/start_QPIDWebMonitor
index abe29091127a5e0adf4b530d63670a3f45c8be0d..35e6537cffaa51cdff1e092a0a9970fb8550d380 100755
--- a/LCS/MessageDaemons/webmonitor/start_QPIDWebMonitor
+++ b/LCS/MessageDaemons/webmonitor/start_QPIDWebMonitor
@@ -1,8 +1,8 @@
 #!/bin/bash
 
-webserv=`ps -o pid -C QPIDWebserverJSON |wc | awk '{ print $1 }'`
+webserv=`ps ax --no-headers -o pid,cmd | grep QPIDWebserverJSON | grep -v grep | wc -l`
 
-if [ "$webserv" == "1" ]; then
+if [ "$webserv" == "0" ]; then
    echo -n QPID status webserver not running, starting up...
    sudo -u qpidd LOFARROOT=$LOFARROOT sh -c "ulimit -s 1024; /opt/lofar/bin/QPIDWebserverJSON 1>> /opt/lofar/var/log//QPIDwebserverJSON.log 2>&1 &"
 else
@@ -12,8 +12,8 @@ fi
 
 sleep 1
 # Check if running
-webserv=`ps -o pid -C QPIDWebserverJSON |wc | awk '{ print $1 }'`
-if [ "$webserv" == "1" ]; then 
+webserv=`ps ax --no-headers -o pid,cmd | grep QPIDWebserverJSON | grep -v grep | wc -l`
+if [ "$webserv" == "0" ]; then 
   echo "    FAILED"
 else 
   echo "    OK"
diff --git a/LCS/MessageDaemons/webmonitor/stop_QPIDWebMonitor b/LCS/MessageDaemons/webmonitor/stop_QPIDWebMonitor
index 807b0e55cf2bb7a21cafc2dad95e413e560f3819..99e31a1b0f90f8dee5f9f89b26ea144a3fc84928 100755
--- a/LCS/MessageDaemons/webmonitor/stop_QPIDWebMonitor
+++ b/LCS/MessageDaemons/webmonitor/stop_QPIDWebMonitor
@@ -3,18 +3,18 @@
 # Script to stop running instance of QPID Web Monitor
 #
 
-webserv=`ps -o pid -C QPIDWebserverJSON |wc | awk '{ print $1 }'`
+webserv=`ps ax --no-headers -o pid,cmd | grep QPIDWebserverJSON | grep -v grep | wc -l`
 
-if [ "$webserv" == "1" ]
+if [ "$webserv" == "0" ]
 then
    echo QPID Monitor not running.
 else
    echo -n QPID status webserver is running, stopping now...
-   wspid=`ps -o pid -C QPIDWebserverJSON | sed "s/PID//g"`
+   wspid=`ps ax --no-headers -o pid,cmd | grep QPIDWebserverJSON | grep -v grep | awk '{ print $1; }'`
    sudo kill -KILL $wspid
    sleep 1
-   webserv=`ps -o pid -C QPIDWebserverJSON |wc | awk '{ print $1 }'`
-   if [ "$webserv" != "1" ]; then 
+   webserv=`ps ax --no-headers -o pid,cmd | grep QPIDWebserverJSON | grep -v grep | wc -l`
+   if [ "$webserv" != "0" ]; then 
      echo "    FAILED"
    else
      echo "    OK"
diff --git a/LCS/Messaging/python/messaging/RPC.py b/LCS/Messaging/python/messaging/RPC.py
index 4f142cece330bf20c2b2c8f36fabd7eeca33eeb9..1e15f394bda7e68e974ab6ec004444db8d41619b 100644
--- a/LCS/Messaging/python/messaging/RPC.py
+++ b/LCS/Messaging/python/messaging/RPC.py
@@ -20,7 +20,7 @@
 #
 
 #  RPC invocation with possible timeout
-from .messagebus import ToBus, TemporaryQueue
+from .messagebus import ToBus, TemporaryQueue, DEFAULT_BROKER
 from .messages import RequestMessage, ReplyMessage
 import uuid
 import logging
@@ -96,7 +96,7 @@ class RPC():
         self.Verbose           = kwargs.pop("Verbose", False)
         self.BusName           = kwargs.pop("busname", None)
         self.ServiceName       = service
-        self.broker            = broker if broker else 'localhost'
+        self.broker            = broker if broker else DEFAULT_BROKER
 
         if self.BusName is None:
             self.request_sender = ToBus(self.ServiceName, broker=self.broker)
@@ -150,7 +150,13 @@ class RPC():
         Content = _args_as_content(*args, **kwargs)
         HasArgs, HasKwArgs = _analyze_args(args, kwargs)
 
-        with TemporaryQueue(self.broker) as tmp_queue:
+        logger.debug("executing rpc call to address=%s subject=%s with timeout=%s",
+                     self.request_sender.address,
+                     self.request_sender.subject,
+                     timeout)
+
+        tmp_queue_postfix = self.request_sender.address
+        with TemporaryQueue(name=tmp_queue_postfix, broker=self.broker) as tmp_queue:
             with tmp_queue.create_frombus(connection_log_level=logging.DEBUG) as reply_receiver:
                 request_msg = RequestMessage(content=Content, reply_to=reply_receiver.address,
                                              has_args=HasArgs, has_kwargs=HasKwArgs)
@@ -160,11 +166,20 @@ class RPC():
                 self.request_sender.send(request_msg)
                 answer = reply_receiver.receive(timeout)
 
+                logger.debug("executed rpc call to address=%s subject=%s received answer on %s",
+                             self.request_sender.address,
+                             self.request_sender.subject,
+                             reply_receiver.address)
+
         status = {}
         # Check for Time-Out
         if answer is None:
             status["state"] = "TIMEOUT"
-            status["errmsg"] = "RPC Timed out"
+            status["errmsg"] = "RPC Timed out with call to service_bus: %s subject: %s, receiving on tmp_queue: %s request_msg: %s" % (
+                self.request_sender.address,
+                self.request_sender.subject,
+                reply_receiver.address,
+                request_msg)
             status["backtrace"] = ""
             raise RPCTimeoutException(status)
 
@@ -302,7 +317,7 @@ class RPCWrapper(object):
     def close(self):
         '''Close all opened rpc connections'''
         for rpc in list(self._serviceRPCs.values()):
-            logger.debug('closing rpc connection %s at %s', rpc.request_sender.address, rpc.broker)
+            logger.info('closing rpc connection %s at %s', rpc.request_sender.address, rpc.broker)
             rpc.close()
 
     def __enter__(self):
@@ -330,7 +345,7 @@ class RPCWrapper(object):
                 # not in cache
                 # so, create RPC for this service method, open it, and cache it
                 rpc = RPC(service_method, busname=self.busname, broker=self.broker, ForwardExceptions=True, **rpckwargs)
-                logger.debug('opening rpc connection %s at %s', rpc.request_sender.address, rpc.broker)
+                logger.info('opening rpc connection method=%s address=%s broker=%s', service_method, rpc.request_sender.address, rpc.broker)
                 rpc.open()
                 self._serviceRPCs[service_method] = rpc
 
@@ -356,4 +371,4 @@ class RPCWrapper(object):
             logger.error(str(e))
             raise
 
-__all__ = ["RPC", "RPCException", "RPCWrapper"]
+__all__ = ["RPC", "RPCException", "RPCTimeoutException", "RPCWrapper"]
diff --git a/LCS/Messaging/python/messaging/Service.py b/LCS/Messaging/python/messaging/Service.py
index 94f9d1657369296fb91f2c68cda63dcc3eefdeac..c2dc377bca9a9c2d846b53b59768e007d1086cca 100644
--- a/LCS/Messaging/python/messaging/Service.py
+++ b/LCS/Messaging/python/messaging/Service.py
@@ -227,6 +227,7 @@ class Service(AbstractBusListener):
                 else:
                     replymessage = serviceHandlerMethod()
 
+            #TODO: check for timeout and/or presence of response queue!
             self._send_reply(replymessage,"OK",lofar_msg.reply_to)
 
         except Exception as e:
diff --git a/LCS/Messaging/python/messaging/messagebus.py b/LCS/Messaging/python/messaging/messagebus.py
index 978759c13ba3ccbb46bf552d5b34523184531efb..6aa81bd049a4f96a9b1f1eb51487d29e2e161df5 100644
--- a/LCS/Messaging/python/messaging/messagebus.py
+++ b/LCS/Messaging/python/messaging/messagebus.py
@@ -30,6 +30,7 @@ from lofar.messaging.exceptions import MessageBusError, MessageFactoryError
 from lofar.messaging.messages import to_qpid_message, MESSAGE_FACTORY
 from lofar.common.util import raise_exception, is_iterable
 from lofar.common.datetimeutils import to_milliseconds_since_unix_epoch, from_milliseconds_since_unix_epoch
+from lofar.common import isProductionEnvironment, isTestEnvironment
 
 import proton
 import proton.utils
@@ -37,18 +38,23 @@ import proton.reactor
 import logging
 import uuid
 import threading
-from datetime import datetime
+from datetime import datetime, timedelta
 from copy import deepcopy
+from time import sleep
 
 logger = logging.getLogger(__name__)
 
 # Default settings for often used parameters.
-DEFAULT_BROKER = "localhost:5672"
-DEFAULT_BROKER_OPTIONS = {'reconnect': True}
-DEFAULT_RECEIVER_CAPACITY = 128
+if isProductionEnvironment():
+    DEFAULT_BROKER = "scu001.control.lofar"
+elif isTestEnvironment():
+    DEFAULT_BROKER = "scu199.control.lofar"
+else: # development environment
+    DEFAULT_BROKER = "localhost"
+
+DEFAULT_RECEIVER_CAPACITY = 1
 DEFAULT_TIMEOUT = 5
 
-
 class _ProtonSubjectFilter(proton.reactor.Filter):
     """
     helper class for filtering by subject
@@ -75,7 +81,7 @@ class _AbstractBus():
     but that of __new__().
     """
 
-    def __init__(self, address, broker=None, connection_log_level=logging.INFO):
+    def __init__(self, address, broker=None, connection_log_level=logging.INFO, auto_reconnect=True):
         """
         Initializer.
         :param address: valid Qpid address
@@ -85,6 +91,7 @@ class _AbstractBus():
         self.broker = broker if broker else DEFAULT_BROKER
         self._connected = False
         self._connection_log_level = connection_log_level
+        self._auto_reconnect = auto_reconnect
 
     @staticmethod
     def _split_address_and_subject(address):
@@ -104,12 +111,13 @@ class _AbstractBus():
         #deprecated. Use is_connected() instead. Kept for backwards compatibility.
         return self.is_connected()
 
-    def open(self):
+    def open(self, retry_on_connection_error=True):
         """
         The following actions will be performed when entering a context:
         * connect to the broker
         * add a receiver
         The connection to the broker will be closed if any of these failed.
+        :param retry_on_connection_error: If True then keep on trying to open the connection.
         :raise MessageBusError: if any of the above actions failed.
         :return: self
         """
@@ -132,7 +140,10 @@ class _AbstractBus():
                                                                            self.broker,
                                                                            ex)
             logger.exception(error_msg)
-            raise MessageBusError(error_msg)
+            if self._auto_reconnect and retry_on_connection_error and isinstance(ex, proton.ConnectionException):
+                self.reconnect(num_retries=None, retry_wait_time=10, timeout=None)
+            else:
+                raise MessageBusError(error_msg)
 
     def close(self):
         """
@@ -143,7 +154,10 @@ class _AbstractBus():
 
         try:
             self._disconnect_from_endpoint()
+
+            logger.debug("[%s] Disconnecting from broker: %s", self.__class__.__name__, self.broker)
             self.connection.close()
+            logger.debug("[%s] Disconnected from broker: %s", self.__class__.__name__, self.broker)
         except proton.ProtonException as ex:
             error_msg = "[%s] Connecting to %s at broker %s failed: %s" % (self.__class__.__name__,
                                                                            self.address,
@@ -157,6 +171,45 @@ class _AbstractBus():
             self.connection = None
             self._connected = False
 
+    def reconnect(self, num_retries=1, retry_wait_time=10, timeout=DEFAULT_TIMEOUT):
+        """
+        (Try to) reconnect to the messagebus.
+        :param num_retries: number of times to retry the reconnect. If None, retry indefinitely or until <timeout> seconds passed.
+        :param retry_wait_time: waiting time in seconds between retry attempts
+        :param timeout: timeout in seconds. Stop attempting to reconnect after this timeout, even when num_retries is not reach yet.
+                        if timeout<0 or None, then never do timeout.
+        :return: bool indicating if (re)connect was successful
+        """
+
+        logger.info("trying to reconnect to %s", self.address)
+        retry_cnt = 0
+        reconnect_start_time = datetime.utcnow()
+        while num_retries is None or retry_cnt < num_retries:
+            try:
+                self.close()
+            except Exception as ex:
+                logger.exception(ex)
+
+            try:
+                # try to open the connection, but let this current reconnect loop handle the reconnects -> retry_on_connection_error=False
+                self.open(retry_on_connection_error=False)
+            except Exception as ex:
+                logger.exception(ex)
+
+            if self.is_connected():
+                return True
+
+            if timeout is not None and timeout >= 0:
+                if datetime.utcnow() - reconnect_start_time > timedelta(seconds=timeout):
+                    logger.error("could not reconnect to %s within %s seconds", self.address, timeout)
+                    return False
+
+            logger.info("waiting %s seconds before retrying to connect to %s", retry_wait_time, self.address)
+            sleep(retry_wait_time)
+            retry_cnt += 1
+
+        return self.is_connected()
+
     def __enter__(self):
         self.open()
         return self
@@ -189,14 +242,16 @@ class FromBus(_AbstractBus):
     but that of __new__().
     """
 
-    def __init__(self, address, broker=None, broker_options=None, connection_log_level=logging.INFO):
+    def __init__(self, address, broker=None, broker_options=None, connection_log_level=logging.INFO, auto_reconnect=True):
         """
         Initializer.
         :param address: valid Qpid address
         :param broker: valid Qpid broker URL, e.g. "localhost:5672"
         :param broker_options: OBSOLETE
         """
-        super(FromBus, self).__init__(address=address, broker=broker, connection_log_level=connection_log_level)
+        super(FromBus, self).__init__(address=address, broker=broker,
+                                      connection_log_level=connection_log_level,
+                                      auto_reconnect=auto_reconnect)
 
         if broker_options:
             logger.warning("broker_options are obsolete. address=%s broker=%s broker_options=%s",
@@ -234,19 +289,18 @@ class FromBus(_AbstractBus):
         """
         Receive the next message from any of the queues we're listening on.
         :param timeout: maximum time in seconds to wait for a message.
-        :param logDebugMessages: OBSOLETE
+        :param logDebugMessages: do/don't log superfluous debug messages (to reduce spam in logs)
         :return: received message, None if timeout occurred.
         """
-        if logDebugMessages:
-            # TODO: remove parameter logDebugMessages, and check usages.
-            logger.warning("[FromBus] ignoring obsolete parameter 'logDebugMessages' in method receive()")
 
-        logger.debug("[FromBus] Waiting %s seconds for next message", timeout)
+        if logDebugMessages:
+            logger.debug("[FromBus] Waiting %s seconds for next message", timeout)
         try:
             while True: # break when message is acceptable
                 msg = self._receiver.receive(timeout=timeout)
                 if msg is not None:
-                    logger.debug("[FromBus] Message received on: %s subject: %s" % (self.address, msg.subject))
+                    if logDebugMessages:
+                        logger.debug("[FromBus] Message received on: %s subject: %s" % (self.address, msg.subject))
                     break  # handle this message
 
         except proton.Timeout:
@@ -259,6 +313,7 @@ class FromBus(_AbstractBus):
         except Exception as e:
             #FIXME: what if another exception is raised? should we reconnect?
             logger.error(e)
+            self.reject()
             raise_exception(MessageBusError,
                             "[FromBus] unknown exception while receiving message on %s: %s" % (self.address, e))
 
@@ -268,21 +323,21 @@ class FromBus(_AbstractBus):
 
             # convert proton/qpid msg to lofarmessage
             lofar_msg = MESSAGE_FACTORY.create(msg)
+            logger.debug("[FromBus] received %s on bus %s" % (lofar_msg, self.address))
 
             # acknowledge the message on the broker
-            self.ack(msg)
+            self.ack()
 
             return lofar_msg
         except Exception as e:
-            self.reject(msg)
+            self.reject()
             raise_exception(MessageBusError, "[FromBus] Message rejected. Error=%s".format(e))
 
 
-    def ack(self, msg):
+    def ack(self):
         """
-         Acknowledge a message. This will inform Qpid that the message can
+        Acknowledge the last received message. This will inform Qpid that the message can
         safely be removed from the queue.
-        :param msg: message to be acknowledged
         """
         try:
             self._receiver.accept()    # with proton, we can only unspecifically accecpt the last received message
@@ -291,16 +346,21 @@ class FromBus(_AbstractBus):
             #logger.debug("[FromBus] Could not acknowledge message: %s error=%s", msg, e)
             pass
         else:
-            logger.debug("[FromBus] acknowledged message: %s", msg)
+            logger.debug("[FromBus] acknowledged last message")
 
-    def _reject(self, msg):
+    def reject(self):
         """
         Reject a message. This will inform Qpid that the message should not be
-        redelivered. You cannot reject a message that has already been acknowledged.
-        :param msg: message to be rejected
+        redelivered.
         """
-        logger.warning("[FromBus] _reject() is not supported, using ack() instead")
-        self.ack()
+        try:
+            self._receiver.reject()    # with proton, we can only unspecifically reject the last received message
+        except Exception as e:
+            # This seems to happen quite often...
+            logger.exception("[FromBus] Could not reject last message error=%s", e)
+            pass
+        else:
+            logger.debug("[FromBus] rejected last message")
 
     def drain(self, timeout=0.1):
         """Read and ack all messages until queue/exchange is empty"""
@@ -355,14 +415,16 @@ class ToBus(_AbstractBus):
     but that of __new__().
     """
 
-    def __init__(self, address, broker=None, broker_options=None, connection_log_level=logging.INFO):
+    def __init__(self, address, broker=None, broker_options=None, connection_log_level=logging.INFO, auto_reconnect=True):
         """
         Initializer.
         :param address: valid Qpid address
         :param broker: valid Qpid broker URL, e.g. "localhost:5672"
         :param broker_options: OBSOLETE
         """
-        super(ToBus, self).__init__(address=address, broker=broker, connection_log_level=connection_log_level)
+        super(ToBus, self).__init__(address=address, broker=broker,
+                                    connection_log_level=connection_log_level,
+                                    auto_reconnect=auto_reconnect)
 
         if broker_options:
             logger.warning("broker_options are obsolete. address=%s broker=%s broker_options=%s",
@@ -407,9 +469,23 @@ class ToBus(_AbstractBus):
 
             logger.debug("[ToBus] Sending message to: %s (%s)", self.address, qmsg)
 
-            self._sender.send(qmsg, timeout=timeout)
+            sending_start_time = datetime.utcnow()
+            while True:
+                try:
+                    # if datetime.utcnow() - sending_start_time > timedelta(seconds=timeout):
+                    #     raise TimeoutError("Could not send msg to %s within %s seconds", self.address, timeout)
+
+                    self._sender.send(qmsg, timeout=timeout)
+                    break # no exception, so sending succeeded, break out of loop.
+                except proton.ProtonException as ex:
+                    logger.error("error while sending message to %s, trying to reconnect... error=%s", self.address, ex)
+
+                    if not self.reconnect(num_retries=None, retry_wait_time=5, timeout=timeout):
+                        raise # reconnecting did not help, re-raise original exception....
+                    # yes, reconnect worked, try sending again in the while loop
+
         except Exception as e:
-            raise_exception(MessageBusError, "[ToBus] Failed to send message to: %s error=%s" % (self._sender.target, e))
+            raise_exception(MessageBusError, "[ToBus] Failed to send message to: %s error=%s" % (self.address, e))
         finally:
             # restore the original body (in case it was modified)
             if qmsg_body_original is not None:
@@ -434,7 +510,7 @@ class TemporaryQueue(object):
 
     Alternative use cases with only a tobus or only a frombus on the tmp_queue are also possible.
     """
-    def __init__(self, name=None, broker="localhost"):
+    def __init__(self, name=None, broker=DEFAULT_BROKER):
         """
         Create a TemporaryQueue instance with an optional name on the given broker.
         :param name: Optional name, which is part of the final address which also includes a uuid.
@@ -547,17 +623,18 @@ class AbstractBusListener(object):
         if self._listening == True:
             return
 
-        self._bus_listener  = FromBus(self.address, broker=self.broker, connection_log_level=logging.INFO)
-        self._bus_listener.open()
-
         if numthreads != None:
             self._numthreads = numthreads
 
         self._running.set()
         self._threads = {}
+        self._bus_listeners = {}
         for i in range(self._numthreads):
             thread = threading.Thread(target=self._loop)
             self._threads[thread] = self._create_thread_args(i)
+            _bus_listener = FromBus(self.address, broker=self.broker, connection_log_level=logging.INFO)
+            _bus_listener.open()
+            self._bus_listeners[thread] = _bus_listener
             thread.start()
         self._listening = True
 
@@ -576,15 +653,16 @@ class AbstractBusListener(object):
 
             for thread, args in list(self._threads.items()):
                 logger.debug("Thread %2d: STOPPING Listening for messages on %s at broker %s" %
-                             (args['index'], self.address, self.broker if self.broker else 'localhost'))
+                             (args['index'], self.address, self.broker if self.broker else DEFAULT_BROKER))
                 thread.join()
                 logger.info("Thread %2d: STOPPED Listening for messages on %s" % (args['index'], self.address))
                 logger.info("           %d messages received and %d processed OK." % (args['num_received_messages'], args['num_processed_messages']))
-        self._listening = False
 
-        # close the listeners
-        if self._bus_listener.isConnected():
-            self._bus_listener.close()
+                # close the listeners
+                if self._bus_listeners[thread].isConnected():
+                    self._bus_listeners[thread].close()
+
+        self._listening = False
 
 
     def __enter__(self):
@@ -629,7 +707,7 @@ class AbstractBusListener(object):
         args = self._threads[currentThread]
         thread_idx = args['index']
         logger.info( "Thread %d START Listening for messages on %s at broker %s" %
-                    (thread_idx, self.address, self.broker if self.broker else 'localhost'))
+                    (thread_idx, self.address, self.broker if self.broker else DEFAULT_BROKER))
         try:
             self._onListenLoopBegin()
         except Exception as e:
@@ -641,11 +719,11 @@ class AbstractBusListener(object):
                 self._onBeforeReceiveMessage()
             except Exception as e:
                 logger.error("onBeforeReceiveMessage() failed with %s", e)
-                continue
+                pass
 
             try:
                 # get the next message
-                lofar_msg = self._bus_listener.receive(1)
+                lofar_msg = self._bus_listeners[currentThread].receive(1)
                 # retry if timed-out
                 if lofar_msg is None:
                     continue
@@ -661,8 +739,6 @@ class AbstractBusListener(object):
 
                     self._debug("Finished handler")
 
-                    self._bus_listener.ack(lofar_msg)
-
                     args['num_processed_messages'] += 1
 
                     try:
diff --git a/LCS/Messaging/python/messaging/messages.py b/LCS/Messaging/python/messaging/messages.py
index cebd46f59928914044745ccaf266015f1121068e..7d996081d70d98473d77a42093a57dd6699792d8 100644
--- a/LCS/Messaging/python/messaging/messages.py
+++ b/LCS/Messaging/python/messaging/messages.py
@@ -234,18 +234,10 @@ class LofarMessage(object):
         print(str(self))
 
     def __str__(self):
-        result = ''
-        for (key, value) in \
-                self.__dict__['_qpid_msg'].__dict__['properties'].items():
-            result += "%s: %s\n" % (key, value)
-
-        result += "---\n"
-
-        for key in _QPID_MESSAGE_FIELDS:
-            if (key != 'properties' and key in self.__dict__['_qpid_msg'].__dict__.items()):
-                result += "%s:%s\n" % (key, self.__dict__['_qpid_msg'].__dict__[key])
-        result += "===\n"
-        return result
+        return "%s subject: %s id: %s%s" % (self.__class__.__name__,
+                                            self.subject,
+                                            self.MessageId,
+                                            (" reply_to: %s" % (self.reply_to,)) if self.reply_to else "")
 
 
 class EventMessage(LofarMessage):
diff --git a/LCS/Messaging/python/messaging/test/t_RPC.py b/LCS/Messaging/python/messaging/test/t_RPC.py
index a046d9efd42983135e05e2beba0124e1a850f617..6daaf3a9b0d61e0dd64e6f0c7f51590a3a10be3e 100644
--- a/LCS/Messaging/python/messaging/test/t_RPC.py
+++ b/LCS/Messaging/python/messaging/test/t_RPC.py
@@ -5,10 +5,11 @@ It defines 5 functions and first calls those functions directly to check
 that the functions are OK. Next the same tests are done with the RPC and
 Service classes in between. This should give the same results.
 """
-import sys
+from pprint import pformat
+from time import sleep
 from contextlib import ExitStack
 
-from lofar.messaging import Service, RPC, TemporaryQueue
+from lofar.messaging import Service, RPC, TemporaryQueue, RPCTimeoutException
 
 class UserException(Exception):
     "Always thrown in one of the functions"
@@ -33,6 +34,12 @@ def StringFunc(input_value):
         raise InvalidArgType("Input value must be of the type 'string'")
     return input_value.upper()
 
+def TimeoutFunc(input_value):
+    """
+    create a timeout by sleeping
+    """
+    sleep(2)
+
 def ListFunc(input_value):
     "Convert the list to uppercase."
     if not isinstance(input_value, list):
@@ -70,6 +77,8 @@ if __name__ == '__main__':
     # ErrorFunc
     import logging
     logging.basicConfig(format='%(asctime)s %(process)d %(levelname)s %(message)s', level=logging.INFO)
+    logger = logging.getLogger(__name__)
+
     try:
         result = ErrorFunc("aap noot mies")
     except UserException as e:
@@ -108,7 +117,7 @@ if __name__ == '__main__':
     if result != {'mies' : "MEISJE", "aap" : 125, "noot" : [2, 3]}:
         raise Exception("Dict function failed:{}".format(result))
 
-    print("Functions tested outside RPC: All OK")
+    logger.info("Functions tested outside RPC: All OK")
 
     with TemporaryQueue("t_RPC") as test_queue:
 
@@ -118,12 +127,12 @@ if __name__ == '__main__':
         serv3 = Service("StringService",    StringFunc,    busname=test_queue.address, numthreads=1)
         serv4 = Service("ListService",      ListFunc,      busname=test_queue.address, numthreads=1)
         serv5 = Service("DictService",      DictFunc,      busname=test_queue.address, numthreads=1)
-
+        serv6 = Service("TimeoutService",   TimeoutFunc,   busname=test_queue.address, numthreads=1)
 
 
         # 'with' sets up the connection context and defines the scope of the service.
         with ExitStack() as stack:
-            for arg in (serv1, serv2, serv3, serv4, serv5):
+            for arg in (serv1, serv2, serv3, serv4, serv5, serv6):
                 stack.enter_context(arg)
 
             # Start listening in the background. This will start as many threads as defined by the instance
@@ -132,6 +141,7 @@ if __name__ == '__main__':
             serv3.start_listening()
             serv4.start_listening()
             serv5.start_listening()
+            serv6.start_listening()
 
             # Redo all tests but via through RPC
             # ErrorFunc
@@ -178,7 +188,15 @@ if __name__ == '__main__':
                 if result[0] != {'mies' : "MEISJE", "aap" : 125, "noot" : [2, 3]}:
                     raise Exception("Dict function failed:{}".format(result))
 
-            print("Functions tested with RPC: All OK")
+            # TimeoutFunc
+            with RPC("TimeoutService", busname=test_queue.address, timeout=1) as rpc:
+                try:
+                    result = rpc("some random string")
+                    raise Exception("TimeoutService did not timeout as expected...")
+                except RPCTimeoutException as e:
+                    logger.info("TimoutService timed out as expected. RPCTimeoutException: %s", e.args)
+
+            logger.info("Functions tested with RPC: All OK")
 
             # Tell all background listener threads to stop and wait for them to finish.
             serv1.stop_listening()
@@ -186,3 +204,4 @@ if __name__ == '__main__':
             serv3.stop_listening()
             serv4.stop_listening()
             serv5.stop_listening()
+            serv6.stop_listening()
diff --git a/LCS/Messaging/python/messaging/test/t_messagebus.py b/LCS/Messaging/python/messaging/test/t_messagebus.py
index 850929fa59aa5d5d0caa4df556184e300782f697..6404845041d6ab40d47f88df6729a3dd2f0884a2 100644
--- a/LCS/Messaging/python/messaging/test/t_messagebus.py
+++ b/LCS/Messaging/python/messaging/test/t_messagebus.py
@@ -35,6 +35,8 @@ from lofar.messaging.messagebus import DEFAULT_RECEIVER_CAPACITY
 from lofar.messaging.exceptions import MessageBusError, InvalidMessage
 from lofar.common.datetimeutils import round_to_millisecond_precision
 from lofar.common.util import convertIntKeysToString, convertStringDigitKeysToInt
+from time import sleep
+from threading import Lock
 
 logger = logging.getLogger(__name__)
 
@@ -142,7 +144,7 @@ class FromBusInitFailed(unittest.TestCase):
         Connecting to non-existent broker address must raise MessageBusError
         """
         with self.assertRaisesRegex(MessageBusError, "Name or service not known"):
-            with FromBus(self.test_queue.address, broker="foo.bar"):
+            with FromBus(self.test_queue.address, broker="foo.bar", auto_reconnect=False):
                 pass
 
     def test_connection_refused(self):
@@ -150,7 +152,7 @@ class FromBusInitFailed(unittest.TestCase):
         Connecting to broker on wrong port must raise MessageBusError
         """
         with self.assertRaisesRegex(MessageBusError, "Connection refused"):
-            with FromBus("fake" + self.test_queue.address, broker="localhost:4"):
+            with FromBus("fake" + self.test_queue.address, broker="localhost:4", auto_reconnect=False):
                 pass
 
 
@@ -194,7 +196,7 @@ class ToBusInitFailed(unittest.TestCase):
         Connecting to non-existent broker address must raise MessageBusError
         """
         with self.assertRaisesRegex(MessageBusError, "Name or service not known"):
-            with ToBus(self.test_queue.address, broker="foo.bar",  broker_options={'reconnect': False}):
+            with ToBus(self.test_queue.address, broker="foo.bar", auto_reconnect=False):
                 pass
 
     def test_connection_refused(self):
@@ -202,7 +204,7 @@ class ToBusInitFailed(unittest.TestCase):
         Connecting to broker on wrong port must raise MessageBusError
         """
         with self.assertRaisesRegex(MessageBusError, "Connection refused"):
-            with ToBus(self.test_queue.address, broker="localhost:4"):
+            with ToBus(self.test_queue.address, broker="localhost:4", auto_reconnect=False):
                 pass
 
 # ========  Combined FromBus/ToBus unit tests  ======== #
@@ -244,8 +246,8 @@ class QueueIntrospection(unittest.TestCase):
             self.assertEqual(0, self.frombus.nr_of_messages_in_queue())
 
     def test_counting_multiple_messages_in_queue(self):
-        # DEFAULT_RECEIVER_CAPACITY should be > 2 otherwise we cannot even store multiple messages in the local queue
-        self.assertGreaterEqual(DEFAULT_RECEIVER_CAPACITY, 2)
+        # DEFAULT_RECEIVER_CAPACITY should be >= 1 otherwise we cannot even store multiple messages in the local queue
+        self.assertGreaterEqual(DEFAULT_RECEIVER_CAPACITY, 1)
 
         with self.tobus, self.frombus:
             MAX_NR_OF_MESSAGES = min(10, DEFAULT_RECEIVER_CAPACITY)
@@ -279,8 +281,8 @@ class SendReceiveMessage(unittest.TestCase):
         """
         with self.tobus, self.frombus:
             self.tobus.send(send_msg)
-            recv_msg = self.frombus.receive(timeout=TIMEOUT)
-            self.frombus.ack(recv_msg)
+            recv_msg = self.frombus.receive(timeout=TIMEOUT, logDebugMessages=True)
+
         self.assertEqual(
             (send_msg.SystemName, send_msg.MessageId, send_msg.MessageType),
             (recv_msg.SystemName, recv_msg.MessageId, recv_msg.MessageType))
@@ -373,6 +375,127 @@ class SendReceiveMessage(unittest.TestCase):
         recv_msg = self._test_sendrecv(RequestMessage(convertIntKeysToString(content), reply_to=self.test_queue.address))
         self.assertEqual(content, convertStringDigitKeysToInt(recv_msg.body))
 
+    def test_sendrecv_reconnect_in_send(self):
+        """
+        Test send/receive of an RequestMessage even when sending the message raises a ProtonException.
+        """
+        from proton.utils import BlockingSender
+        from proton import ProtonException
+
+        # use fancy code injection into proton to enforce a ProtonException at the correct moment
+        original_send = BlockingSender.send
+        def mock_send(self, msg, timeout=False, error_states=None):
+            # restore original behaviour into proton...
+            BlockingSender.send = original_send
+            raise ProtonException("mocked ProtonException")
+
+        # inject into proton...
+        BlockingSender.send = mock_send
+
+        # normal sendrecv... even though the exception is raised... reconnecting and retrying should still deliver message!
+        self._test_sendrecv(RequestMessage("foobar", reply_to=self.test_queue.address))
+
+        # check if the injection worked, by checking if BlockingSender.send has been restored to the original
+        self.assertTrue(BlockingSender.send == original_send)
+
+
+class PingPongPlayer(AbstractBusListener):
+    """
+    Helper class with a simple purpose:
+        - listen on one queue,
+        - when receiving a message, send answer on second queue, flipping message contents between ping and pong.
+
+
+    """
+    def __init__(self, name, listen_queue_name, response_queue_name, num_threads):
+        self.name = name
+        self.num_turns = 0
+        self.response_queue_name = response_queue_name
+        self.lock = Lock() # a lock to keep track of self.num_turns in a multithreaded environment
+        super(PingPongPlayer, self).__init__(listen_queue_name, numthreads=num_threads)
+
+    def get_num_turns(self):
+        with self.lock:
+            return self.num_turns
+
+    def _handleMessage(self, msg):
+        """Implementation of AbstractBusListener._handleMessage
+        log received message, and send response.
+        """
+        logger.info("%s: received %s", self.name, msg.content)
+        self.send_response(msg.content)
+
+    def send_response(self, value):
+        """
+        Send a response message to the response_queue_name, flipping ping for pong and vice versa
+        """
+        with ToBus(self.response_queue_name) as response_bus:
+            response_msg = EventMessage(content="ping" if value == "pong" else "pong")
+
+            logger.info("%s: sending %s", self.name, response_msg.content)
+            response_bus.send(response_msg)
+
+            with self.lock:
+                self.num_turns += 1
+
+class PingPongTester(unittest.TestCase):
+    """Test an event driven message ping/pong game, where two 'players' respond to each other.
+    This test should work regardless of the number of threads the each 'player'/AbstractBusListener uses"""
+
+    def test_single_thread_per_player(self):
+        self._play(1)
+
+    def test_two_threads_per_player(self):
+        self._play(2)
+
+    def test_ten_threads_per_player(self):
+        self._play(10)
+
+    def _play(self, num_threads_per_player):
+        """simulate a ping/pong event driven loop until each player played a given amount of turns, or timeout"""
+
+        # setup tmp forward and return queue
+        with TemporaryQueue("forward") as forward_tmp_queue, TemporaryQueue("return") as return_tmp_queue:
+
+            # create two players, on "both sides of the table" / forward and return queue swapped.
+            with PingPongPlayer("Player1", forward_tmp_queue.address, return_tmp_queue.address, num_threads_per_player) as player1:
+                with PingPongPlayer("Player2", return_tmp_queue.address, forward_tmp_queue.address, num_threads_per_player) as player2:
+
+                    # game parameters
+                    NUM_TURNS = 10
+                    GAME_TIMEOUT = 5
+                    start_timestamp = datetime.utcnow()
+
+                    # first serve, referee throws a ping ball on the table in the direction of player2
+                    with forward_tmp_queue.create_tobus() as first_pinger:
+                        first_msg = EventMessage(content="ping")
+                        logger.info("first message: sending %s", first_msg.content)
+                        first_pinger.send(first_msg)
+
+                    # play the game!
+                    # run the "event loop". Actually there are multiple loops: num_threads per player
+                    # this loop just tracks game progress.
+                    while True:
+                        player1_num_turns = player1.get_num_turns()
+                        player2_num_turns = player2.get_num_turns()
+                        time_remaining = GAME_TIMEOUT - (datetime.utcnow() - start_timestamp).total_seconds()
+
+                        logger.info("player1_num_turns=%d/%d player2_num_turns=%d/%d time_remaining=%ssec",
+                                    player1_num_turns, NUM_TURNS, player2_num_turns, NUM_TURNS, time_remaining)
+
+                        # assert on deadlocked game (should never happen!)
+                        self.assertGreater(time_remaining, 0)
+
+                        if player1_num_turns >= NUM_TURNS and player2_num_turns >= NUM_TURNS :
+                            break
+
+                        sleep(0.1)
+
+                    # assert on players who did not finish the game
+                    self.assertGreaterEqual(player1.get_num_turns(), NUM_TURNS)
+                    self.assertGreaterEqual(player2.get_num_turns(), NUM_TURNS)
+
+
 if __name__ == '__main__':
-    logging.basicConfig(format='%(asctime)s %(process)d %(levelname)s %(message)s', level=logging.INFO)
-    unittest.main(defaultTest='SendReceiveMessage')
+    logging.basicConfig(format='%(asctime)s %(thread)d %(threadName)s %(levelname)s %(message)s', level=logging.INFO)
+    unittest.main()
diff --git a/LCS/Messaging/python/messaging/test/t_service_message_handler.py b/LCS/Messaging/python/messaging/test/t_service_message_handler.py
index ec1e20c25cdea90edbd199bf05f8314c3ded1232..671118471e124b4f4fd59a713e07998f47c8a9fd 100644
--- a/LCS/Messaging/python/messaging/test/t_service_message_handler.py
+++ b/LCS/Messaging/python/messaging/test/t_service_message_handler.py
@@ -70,7 +70,7 @@ class FailingMessageHandling(MessageHandlerInterface):
         print("FailingMessageHandling prepare_receive: %s" % self.args)
         if self.counter:
             time.sleep(1)  # Prevent running around too fast
-            raise UserException("oops in prepare_receive(%d)" % self.counter)
+            raise UserException("FailingMessageHandling: intentional oops in prepare_receive counter=%d" % self.counter)
         else:
             self.counter = self.counter + 1
     def finalize_handling(self, successful):
diff --git a/LCS/PyCommon/lcu_utils.py b/LCS/PyCommon/lcu_utils.py
index ae39ec0ef21e29e381e585d61c47adc940b0daed..e4e3abe0c7373142f9c9cf1ce1c30a51b25a9c37 100644
--- a/LCS/PyCommon/lcu_utils.py
+++ b/LCS/PyCommon/lcu_utils.py
@@ -16,7 +16,7 @@
 # with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
 
 from lofar.common.ssh_utils import ssh_cmd_list
-from lofar.common.subprocess_utils import execute_in_parallel, wrap_composite_command
+from lofar.common.subprocess_utils import execute_in_parallel, wrap_composite_command, communicate_returning_strings
 from subprocess import Popen, PIPE
 
 import os
@@ -100,7 +100,7 @@ def get_current_stations(station_group='today', as_host_names=True):
     cmd = wrap_command_in_lcu_head_node_ssh_call(cmd)
     logger.debug('executing cmd: %s', ' '.join(cmd))
     proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
-    out, err = proc.communicate()
+    out, err = communicate_returning_strings(proc)
 
     if proc.returncode != 0:
         raise LCURuntimeError("Could not fetch stations.txt file. sdterr=%s" % (err, ))
@@ -161,7 +161,7 @@ def get_stations_rcu_mode(stations=None):
 
     result = {}
     for station, proc in list(procs.items()):
-        out, err = proc.communicate()
+        out, err = communicate_returning_strings(proc)
 
         if proc.returncode != 0:
             logger.warning("Could not determine rcu mode for station %s. sdterr=%s" % (station, err))
@@ -213,7 +213,7 @@ def get_station_cable_delays(stations=None):
         # wait for all fetching procs to finish...
         #TODO: add timeout?
         for station, proc in list(cable_delay_procs.items()):
-            out, err = proc.communicate()
+            out, err = communicate_returning_strings(proc)
             if proc.returncode != 0:
                 logger.warning("Could not fetch cable_delay file for station %s. stderr=%s", station, err)
 
@@ -331,7 +331,7 @@ def get_station_calibration_tables(stations=None, antenna_set_and_filter=None, t
         # wait for all fetching procs to finish...
         #TODO: add timeout?
         for station, proc in list(caltable_procs.items()):
-            out, err = proc.communicate()
+            out, err = communicate_returning_strings(proc)
             if proc.returncode != 0:
                 logger.warning("Could not fetch calibration table for station %s. stderr=%s", station, err)
 
diff --git a/LCU/PPSTune/ppstune/ppstune.py b/LCU/PPSTune/ppstune/ppstune.py
index bd7191743c6fb682703e9ee8b902a6a6dcafc86d..2d59bff178427832fc30d019774104b038ced31b 100755
--- a/LCU/PPSTune/ppstune/ppstune.py
+++ b/LCU/PPSTune/ppstune/ppstune.py
@@ -783,7 +783,7 @@ def restart_rsp_driver(lofar_log_dir,
             logging.info('Killing RSPDriver with PID %d', rsp_pid)
             output = subprocess.Popen([sudo_cmd, 'kill', '-15', str(rsp_pid)],
                                       stdin=subprocess.PIPE,
-                                      stdout=subprocess.PIPE).communicate(input='\n')[0]
+                                      stdout=subprocess.PIPE).communicate(input='\n')[0].decode("UTF-8")
             time.sleep(1.0)
             if len(output) != 0:
                 raise OSError('Failed to kill RSPDriver with PID %d:\n%s' %
diff --git a/LCU/StationTest/RSPmonitor.py b/LCU/StationTest/RSPmonitor.py
index b05ee0ef4d76edbd58ed7ebd70d59463be1cca6c..e7c6428bc60fe166ec59f8e7da2df8be6f86db31 100755
--- a/LCU/StationTest/RSPmonitor.py
+++ b/LCU/StationTest/RSPmonitor.py
@@ -53,7 +53,7 @@ def isRSPrunning(board):
 		if debug >= 2: print("busy")
 	
 	if timeout > 0:  
-		output = proc.communicate()[1]	# rsuctl3 sends back returncode via stderr, so use [1] here!
+		output = proc.communicate()[1].decode("UTF-8")	# rsuctl3 sends back returncode via stderr, so use [1] here!
 		flt=5
 		if debug >= 2:print("output:" + output)
 		#if debug >= 1:print "RSP is running"
diff --git a/LTA/LTAIngest/LTAIngestClient/bin/ingestremoveexportjob b/LTA/LTAIngest/LTAIngestClient/bin/ingestremoveexportjob
index 566aa4644dad7634dea2033bdf1435638b49e6d9..328051f79577a602207585da4991df4305ddd738 100755
--- a/LTA/LTAIngest/LTAIngestClient/bin/ingestremoveexportjob
+++ b/LTA/LTAIngest/LTAIngestClient/bin/ingestremoveexportjob
@@ -29,7 +29,7 @@ export_group_id is the mom_id of the export job''')
     with IngestRPC(busname=options.busname, servicename=options.servicename, broker=options.broker) as rpc:
         export_group_id = int(args[0])
         if rpc.removeExportJob(export_group_id):
-            print rpc.getReport(export_group_id)
+            print(rpc.getReport(export_group_id))
         else:
             logger.warn('Failed to remove export group id %s', export_group_id)
 
diff --git a/LTA/LTAIngest/LTAIngestClient/bin/ingestreport b/LTA/LTAIngest/LTAIngestClient/bin/ingestreport
index c74c885059a76cbc4511509a59cd0481e1859e2b..ef2119a796ea426166868b081059ce2197b80490 100755
--- a/LTA/LTAIngest/LTAIngestClient/bin/ingestreport
+++ b/LTA/LTAIngest/LTAIngestClient/bin/ingestreport
@@ -33,10 +33,10 @@ export_group_id is the mom_id of the export job''')
                 export_group_ids = [int(args[0])]
             else:
                 export_group_ids = rpc.getExportIds()
-                print export_group_ids
+                print(export_group_ids)
 
             for export_group_id in export_group_ids:
-                print rpc.getReport(export_group_id)
+                print(rpc.getReport(export_group_id))
 
 if __name__ == '__main__':
     main()
diff --git a/LTA/LTAIngest/LTAIngestCommon/srm.py b/LTA/LTAIngest/LTAIngestCommon/srm.py
index 9e580206740be50c567beaf5d87f604c41176ee8..b02eb185db3863da32ef6452300fd0abbe43fc6a 100755
--- a/LTA/LTAIngest/LTAIngestCommon/srm.py
+++ b/LTA/LTAIngest/LTAIngestCommon/srm.py
@@ -124,7 +124,7 @@ def __execute(cmd, log_prefix='', timeout=-1):
     stdout, stderr = communicate_returning_strings(p_cmd)
 
     if p_cmd.returncode != 0:
-        logger.error('%s: %s', log_prefix, stderr)
+        logger.error('%s: cmd=%s stdout=%s stderr=%s', log_prefix, ' '.join(cmd), stdout, stderr)
 
     return stdout, stderr, p_cmd.returncode
 
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py
index f3253ec63712a7c792c32feff0ef8cc94f40259e..fa914a452ab35451f3d5cd898ef26efe37725520 100644
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/lib/ingestjobmanagementserver.py
@@ -147,7 +147,6 @@ class IngestJobManager:
                     msg = self.__incoming_job_queue.receive(timeout=1.0)
                     while msg:
                         logger.debug("received msg on job queue %s: %s", self.__incoming_job_queue.address, msg)
-                        self.__incoming_job_queue.ack(msg)
 
                         if isinstance(msg, CommandMessage):
                             job = parseJobXml(msg.content)
@@ -590,22 +589,32 @@ class IngestJobManager:
             return sorted(list(set([jad['job'].get('job_group_id', 'unknown_group') for jad in list(self.__job_admin_dicts.values())])))
 
     def __putStalledJobsBackToToDo(self):
-        if datetime.utcnow() - self.__last_putStalledJobsBackToToDo_timestamp < timedelta(minutes=1):
+        if datetime.utcnow() - self.__last_putStalledJobsBackToToDo_timestamp < timedelta(seconds=15):
             return
 
+        logger.debug("checking stalled jobs...")
+
         with self.__lock:
-            now = datetime.utcnow()
-            threshold = timedelta(minutes=15)
-            stalled_job_admin_dicts = [jad for jad in list(self.__job_admin_dicts.values())
-                                       if (jad['status'] == JobProducing or jad['status'] == JobScheduled)
-                                       and now - jad['updated_at'] >= threshold]
-
-            for jad in stalled_job_admin_dicts:
-                logger.info('putting job %s back to ToDo because it did not make any progress during the last 15min', jad['job']['JobId'])
+            scheduled_jads = self.getJobAdminDicts(status=JobScheduled)
+            stalled_scheduled_jads = [jad for jad in scheduled_jads
+                                      if datetime.utcnow() - jad['updated_at'] >= timedelta(seconds=30)]
+
+            for jad in stalled_scheduled_jads:
+                logger.info('putting stalled scheduled job %s back to ToDo because it was not picked up in time by a transferservice', jad['job']['JobId'])
+                self.updateJobStatus(jad['job']['JobId'], JobToDo)
+
+            producing_jads = self.getJobAdminDicts(status=JobProducing)
+            stalled_producing_jads = [jad for jad in producing_jads
+                                      if datetime.utcnow() - jad['updated_at'] >= timedelta(minutes=10)]
+
+            for jad in stalled_producing_jads:
+                logger.info('putting stalled producing job %s back to ToDo because it did not make any progress during the last 10 min', jad['job']['JobId'])
                 self.updateJobStatus(jad['job']['JobId'], JobToDo)
 
             self.__last_putStalledJobsBackToToDo_timestamp = datetime.utcnow()
 
+        logger.debug("checked stalled jobs")
+
     def getNextJobToRun(self):
         '''get the next job to run.
         examine all 'to_do' and 'retry' jobs
@@ -635,6 +644,9 @@ class IngestJobManager:
         with self.__lock:
             def getNextJobByStatus(status, min_age=None, exclude_job_group_ids=[]):
 
+                def jad_sort_value_func(jad):
+                    return jad['job'].get('priority', DEFAULT_JOB_PRIORITY)
+
                 def jad_compare_func(jad_a, jad_b):
                     # sort on priority first
                     if jad_a['job'].get('priority', DEFAULT_JOB_PRIORITY) != jad_b['job'].get('priority', DEFAULT_JOB_PRIORITY):
@@ -677,7 +689,7 @@ class IngestJobManager:
                     # filter out jad's from exclude_job_group_ids
                     job_admin_dicts = [jad for jad in job_admin_dicts if 'job_group_id' not in jad['job'] or jad['job']['job_group_id'] not in exclude_job_group_ids]
 
-                job_admin_dicts = sorted(job_admin_dicts, key=cmp_to_key(jad_compare_func))
+                job_admin_dicts = sorted(job_admin_dicts, key=jad_sort_value_func, reverse=True)
                 if job_admin_dicts:
                     logger.info('%s jobs with status %s waiting', len(job_admin_dicts), jobState2String(status))
                     return job_admin_dicts[0]
@@ -758,12 +770,8 @@ class IngestJobManager:
     def canProduceNextJob(self):
         # test if the managed_job_queue is empty enough, and if our administration agrees
         try:
-            with self.__jobs_for_transfer_queue_peeker:
-                num_scheduled = self.__jobs_for_transfer_queue_peeker.nr_of_messages_in_queue(0.01)
-                if num_scheduled <= 1:
-                    scheduled_jads = self.getJobAdminDicts(status=JobScheduled)
-                    return len(scheduled_jads) <= 1
-                return False
+            scheduled_jads = self.getJobAdminDicts(status=JobScheduled)
+            return len(scheduled_jads) < 1
         except Exception as e:
             logger.exception('canProduceNextJob: %s', e)
             if 'No active session' in str(e):
@@ -773,7 +781,7 @@ class IngestJobManager:
 
     def produceNextJobsIfPossible(self):
         start_producing_timestamp = datetime.utcnow()
-        while self.canProduceNextJob() and datetime.utcnow() - start_producing_timestamp < timedelta(seconds=2):
+        while self.canProduceNextJob() and datetime.utcnow() - start_producing_timestamp < timedelta(seconds=5):
             job_admin_dict = self.getNextJobToRun()
             if job_admin_dict:
                 if os.path.exists(job_admin_dict.get('path')):
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py
index fb3ebbeffcd56e9a4d05595d5b6c615b12ea030e..dd6e551164cfbb0c4563ddfea3b117b4103e8236 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestAdminServer/test/t_ingestjobmanagementserver.py
@@ -86,7 +86,6 @@ with TemporaryQueue(testname+"_ingest_notification_bus") as tmp_queue1, \
                     msg = test_consumer.receive(timeout=1)
 
                     if msg and isinstance(msg, CommandMessage):
-                        test_consumer.ack(msg)
                         job = parseJobXml(msg.content)
                         if job and job.get('JobId'):
                             logger.info("test consumer received job on queue: %s", job)
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingestpipeline.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingestpipeline.py
index 8087fa17333686a633012fec3949feac817c01b9..14e0ba9638b4e79e4f949505a4939f44b86003b1 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingestpipeline.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingestpipeline.py
@@ -16,6 +16,7 @@ from lofar.lta.ingest.server.ltaclient import *
 from lofar.lta.ingest.server.momclient import *
 from lofar.common.util import humanreadablesize
 from lofar.common import isProductionEnvironment
+from lofar.common.subprocess_utils import communicate_returning_strings
 from lofar.messaging import EventMessage, ToBus
 from lofar.lta.ingest.common.config import DEFAULT_INGEST_NOTIFICATION_BUSNAME, DEFAULT_INGEST_NOTIFICATION_PREFIX
 from lofar.lta.ingest.common.config import hostnameToIp
@@ -257,7 +258,7 @@ class IngestPipeline():
                 cmd = ['ssh', '-tt', '-n', '-x', '-q', '%s@%s' % (self.user, sip_host), 'cat %s' % sip_path]
                 logger.info("GetSIP for %s at SIPLocation %s - cmd %s" % (self.JobId, self.job['SIPLocation'], ' ' .join(cmd)))
                 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                out, err = p.communicate()
+                out, err = communicate_returning_strings(p)
                 if p.returncode != 0:
                     raise PipelineError('GetSIP error getting EoR SIP for %s: %s' % (self.JobId, out + err))
 
@@ -300,7 +301,7 @@ class IngestPipeline():
                     cmd = ['ssh', '-tt', '-n', '-x', '-q', '%s@%s' % (self.user, sip_host), 'cat %s' % sip_path]
                     logger.info("GetSIP for %s at SIPLocation %s - cmd %s" % (self.JobId, self.job['SIPLocation'], ' ' .join(cmd)))
                     p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                    out, err = p.communicate()
+                    out, err = communicate_returning_strings(p)
                     if p.returncode != 0:
                         raise PipelineError('GetSIP error getting EoR SIP for %s: %s' % (self.JobId, out + err))
 
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingesttransferserver.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingesttransferserver.py
index a2f4ff178515d552740ce950c309cf9094ce349e..ba108975fc99e334d2b67577a2cea769e93a2f7b 100644
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingesttransferserver.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingesttransferserver.py
@@ -28,6 +28,7 @@ import os
 import time
 import socket
 import getpass
+import pprint
 from threading import Thread, Lock
 from lofar.messaging.messagebus import FromBus, ToBus
 from lofar.messaging.messages import *
@@ -42,22 +43,24 @@ from lofar.lta.ingest.server.config import MAX_NR_OF_JOBS, MAX_USED_BANDWITH_TO_
 from lofar.lta.ingest.server.config import TRANSFER_TIMEOUT
 from lofar.lta.ingest.common.job import *
 from lofar.lta.ingest.server.ingestpipeline import IngestPipeline
+from lofar.lta.ingest.client.rpc import IngestRPC
 from lofar.lta.ingest.server.ltaclient import *
 from lofar.lta.ingest.server.momclient import *
-
-try:
-    import psutil
-except ImportError as e:
-    print(str(e))
-    print('Please install python3 package psutil: pip3 install psutil')
-    exit(1)
+import psutil
 
 logger = logging.getLogger(__name__)
 
 def _getBytesSent():
     try:
-        return psutil.net_io_counters(True).get(NET_IF_TO_MONITOR, psutil.net_io_counters(False)).bytes_sent
-    except Exception:
+        # try to sum the summed traffic of all interfaces in NET_IF_TO_MONITOR
+        counters = psutil.net_io_counters(True)
+        if all(interface in counters for interface in NET_IF_TO_MONITOR):
+            return sum(counters[interface].bytes_sent for interface in NET_IF_TO_MONITOR)
+
+        # not all interfaces found... return total bytes_sent
+        return psutil.net_io_counters(False).bytes_sent
+    except Exception as e:
+        logger.warning("Cannot get network interface info: %s", e)
         return 0
 
 class IngestTransferServer:
@@ -96,7 +99,7 @@ class IngestTransferServer:
         job_id = job_dict['JobId']
 
         if job_id in self.__running_jobs:
-            logger.warning('job %s is already running', job_id)
+            logger.warning('job %s is already running. Discarding this new job copy, and keeping the current one running...', job_id)
             return
 
         def threaded_pipeline_func(job):
@@ -121,11 +124,12 @@ class IngestTransferServer:
 
     def __clearFinishedJobs(self):
         try:
-            finished_job_ids = [job_id for job_id, job_thread_dict in list(self.__running_jobs.items()) if not job_thread_dict['thread'].is_alive()]
+            with self.__lock:
+                finished_job_ids = [job_id for job_id, job_thread_dict in list(self.__running_jobs.items()) if not job_thread_dict['thread'].is_alive()]
 
-            for job_id in finished_job_ids:
-                logger.info('removing finished job %s', job_id)
-                del self.__running_jobs[job_id]
+                for job_id in finished_job_ids:
+                    logger.info('removing finished job %s', job_id)
+                    del self.__running_jobs[job_id]
         except Exception as e:
             logger.error('__clearFinishedJobs: %s', e)
 
@@ -136,10 +140,9 @@ class IngestTransferServer:
             # so the log won't be flooded
             def log_recource_warning(message):
                 if self.__log_recource_warning:
-                    logger.warn(message)
-                    self.__log_recource_warning = False
+                    logger.warning("resources: %s", message)
+                    #self.__log_recource_warning = False
 
-            import pprint
             now = datetime.utcnow()
             bytes_sent = _getBytesSent()
 
@@ -150,6 +153,8 @@ class IngestTransferServer:
                 # running average for used_bandwidth
                 used_bandwidth = 0.5 * speed + 0.5 * self.__prev_used_bandwidth
 
+                logger.debug("resources: current used_bandwidth = %s", humanreadablesize(used_bandwidth, 'bps'))
+
                 # store for next iteration
                 self.__prev_bytes_sent = bytes_sent
                 self.__prev_bytes_sent_timestamp = now
@@ -170,20 +175,28 @@ class IngestTransferServer:
                 self.__prev_bytes_sent_timestamp = now
 
             # only start new jobs if we have some cpu time available
-            if psutil.cpu_times_percent().idle < 5:
+            idle_cpu_percentage = psutil.cpu_times_percent().idle
+            logger.debug("resources: current idle_cpu_percentage = %s%%", idle_cpu_percentage)
+            if idle_cpu_percentage < 5:
                 log_recource_warning('not enough cpu power available to start new jobs, cpu_idle %s%%' %
-                                     psutil.cpu_times_percent().idle)
+                                     idle_cpu_percentage)
                 return False
 
             # only start new jobs if system load is not too high
-            if os.getloadavg()[0] > 1.5 * psutil.cpu_count():
+            short_load_avg = os.getloadavg()[0]
+            cpu_count = psutil.cpu_count()
+            allowed_load = 1.5 * cpu_count
+            logger.debug("resources: current short term load = %s #cpu's = %s allowed_load = %s", short_load_avg, cpu_count, allowed_load)
+            if short_load_avg > allowed_load:
                 log_recource_warning('system load too high (%s > %s), cannot start new jobs' %
-                                     (os.getloadavg()[0],
-                                      1.5 * psutil.cpu_count()))
+                                     (short_load_avg,
+                                      allowed_load))
                 return False
 
             # only allow 1 job at the time if swapping
-            if psutil.swap_memory().percent > 5 and len(self.__running_jobs) > 0:
+            swap_memory_percentage = psutil.swap_memory().percent
+            logger.debug("resources: current swap_memory_percentage = %s%%", swap_memory_percentage)
+            if swap_memory_percentage > 5 and len(self.__running_jobs) > 0:
                 log_recource_warning('system swapping. not enough memory available to start new jobs')
                 return False
 
@@ -191,13 +204,19 @@ class IngestTransferServer:
             try:
                 current_user = getpass.getuser()
                 current_user_procs = [p for p in psutil.process_iter() if p.username() == current_user]
-                if len(current_user_procs) > 64 * psutil.cpu_count():
+                current_num_user_procs = len(current_user_procs)
+                allowed_num_user_procs = 64 * cpu_count
+
+                logger.debug("resources: current num_user_procs = %s allowed_num_user_procs = %s", current_num_user_procs, allowed_num_user_procs)
+
+                if current_num_user_procs > allowed_num_user_procs:
                     log_recource_warning('number of processes by %s too high (%s > %s), cannot start new jobs' %
                                         (current_user,
-                                        len(current_user_procs),
-                                        64 * psutil.cpu_count()))
+                                        current_num_user_procs,
+                                        allowed_num_user_procs))
                     return False
-            except:
+            except Exception as e:
+                logger.exception(e)
                 pass
 
             # limit total number of parallel transferring jobs to self.max_nr_of_parallel_jobs
@@ -208,7 +227,13 @@ class IngestTransferServer:
                 transferring_pipelines = [pipeline for pipeline in pipelines if pipeline.status == IngestPipeline.STATUS_TRANSFERRING]
                 finalizing_pipelines = [pipeline for pipeline in pipelines if pipeline.status == IngestPipeline.STATUS_FINALIZING]
 
-                if len(starting_threads) + len(initializing_pipelines) + len(transferring_pipelines) >= self.max_nr_of_parallel_jobs:
+                num_busy_transfers = len(starting_threads) + len(initializing_pipelines) + len(transferring_pipelines)
+                num_finalizing_transfers = len(finalizing_pipelines)
+
+                logger.debug("resources: current num_busy_transfers = %s num_finalizing_transfers = %s max_nr_of_parallel_jobs = %s",
+                             num_busy_transfers, num_finalizing_transfers, self.max_nr_of_parallel_jobs)
+
+                if num_busy_transfers >= self.max_nr_of_parallel_jobs:
                     log_recource_warning('already running %d parallel jobs (#starting=%d, #transferring=%d) limiting the total number of transferring jobs to %d' %
                                         (len(self.__running_jobs),
                                          len(initializing_pipelines) + len(starting_threads),
@@ -216,15 +241,21 @@ class IngestTransferServer:
                                          self.max_nr_of_parallel_jobs))
                     return False
 
-                if len(finalizing_pipelines) >= 2 * self.max_nr_of_parallel_jobs:
+                if num_finalizing_transfers >= 2 * self.max_nr_of_parallel_jobs:
                     log_recource_warning('already waiting for %d jobs to finish (updating status/SIP to MoM and LTA). not starting new jobs until some jobs finished...' %
                                         (len(finalizing_pipelines),))
                     return False
 
         except Exception as e:
-            logger.error(e)
-            # unknown error, run 1 job at a time
-            return len(self.__running_jobs) == 0
+            logger.exception("error while checking for available resources: %s", e)
+
+            num_running_jobs = len(self.__running_jobs)
+            if num_running_jobs <= 4:
+                logger.info("running %d jobs, assuming we can run 1 more: ", num_running_jobs)
+                return True
+            else:
+                logger.warning("already running %d jobs, assuming for safety we cannot run more jobs...", num_running_jobs)
+                return False
 
         return True
 
@@ -235,10 +266,10 @@ class IngestTransferServer:
                 try:
                     try:
                         if self.__enoughResourcesAvailable():
-                            msg = job_frombus.receive(timeout = 60)
+                            logger.info("enough resources available to start new jobs. waiting for new job on %s", job_frombus.address)
+                            msg = job_frombus.receive(timeout = 10)
                             if msg:
-                                logger.debug("received msg on job queue: %s", msg)
-                                job_frombus.ack(msg)
+                                logger.info("received msg on job queue: %s", msg)
 
                                 if isinstance(msg, CommandMessage):
                                     job_dict = parseJobXml(msg.content)
@@ -249,7 +280,10 @@ class IngestTransferServer:
                                     # allow 1 new recource_warning to be logged
                                     self.__log_recource_warning = True
                                 else:
-                                    logger.warn("unexpected message type: %s", msg)
+                                    logger.warning("unexpected message type: %s", msg)
+                        else:
+                            # wait for resource to become available
+                            time.sleep(5)
                     except KeyboardInterrupt:
                         break
                     except Exception as e:
@@ -273,9 +307,7 @@ class IngestTransferServer:
                             if self.__prev_used_bandwidth > 0.85 * MAX_USED_BANDWITH_TO_START_NEW_JOBS:
                                 time.sleep(1.0)
 
-                        if datetime.utcnow() - self.__running_jobs_log_timestamp > timedelta(seconds = 10):
-                            self.__running_jobs_log_timestamp = datetime.utcnow()
-
+                        if datetime.utcnow() - self.__running_jobs_log_timestamp > timedelta(seconds = 2):
                             with self.__lock:
                                 starting_threads = [job_thread_dict['thread'] for job_thread_dict in list(self.__running_jobs.values()) if 'pipeline' not in job_thread_dict]
                                 pipelines = [job_thread_dict['pipeline'] for job_thread_dict in list(self.__running_jobs.values()) if 'pipeline' in job_thread_dict]
@@ -284,7 +316,7 @@ class IngestTransferServer:
                                 finalizing_pipelines = [pipeline for pipeline in pipelines if pipeline.status == IngestPipeline.STATUS_FINALIZING]
                                 finished_pipelines = [pipeline for pipeline in pipelines if pipeline.status == IngestPipeline.STATUS_FINISHED]
 
-                                status_log_line = "running %s jobs: #starting=%d, #transferring=%d, #finalizing=%d, #finished=%d, bandwith used on network interface %s %s (%s), load=%.1f" % (len(self.__running_jobs),
+                                status_log_line = "status: running %s jobs: #starting=%d, #transferring=%d, #finalizing=%d, #finished=%d, bandwith used on network interface(s) %s %s (%s), load=%.1f" % (len(self.__running_jobs),
                                        len(initializing_pipelines) + len(starting_threads),
                                        len(transferring_pipelines),
                                        len(finalizing_pipelines),
@@ -294,13 +326,34 @@ class IngestTransferServer:
                                        humanreadablesize(self.__prev_used_bandwidth / 8, 'Bps'),
                                        os.getloadavg()[0])
 
-                                logger.info(status_log_line)
+                            logger.info(status_log_line)
+                            self.__running_jobs_log_timestamp = datetime.utcnow()
+
+                            msg = EventMessage(context = self.notification_prefix + 'TransferServiceStatus',
+                                                content = { 'ingest_server': socket.gethostname(),
+                                                            'message' : status_log_line })
+                            msg.ttl = 3600    # remove message from queue's when not picked up within 1 hours
+                            self.event_bus.send(msg)
+
+                            # HACK: for some unknown reason sometimes the ingesttransferserver does not pick up new jobs...
+                            # When there are no running jobs, and the jobmanager reports that there are jobs to do,
+                            # then do a hard exit, so supervisor restarts this ingesttransferserver automatically
+                            # which solves the problem.
+                            # Yes, it's ugly, but for now it works
+                            if len(self.__running_jobs) == 0:
+                                with IngestRPC() as ingest_rpc:
+                                    report = ingest_rpc.getStatusReport()
+                                    num_unfinished_jobs = sum((x.get('to_do', 0) +
+                                                               x.get('scheduled', 0) +
+                                                               x.get('retry', 0)) for x in report.values())
+                                    if num_unfinished_jobs > 0:
+                                        # try to reconnect first...
+                                        if not job_frombus.reconnect(num_retries=5, retry_wait_time=10):
+                                            # no success, so exit, relying on supervisor-triggerred restart
+                                            time.sleep(10) # sleep a little first, so the exit/restart cycle isn't spinning fast.
+                                            logger.warning("Forcing a hard restart because there are jobs to do, but this ingesttransferserver is not picking them up...")
+                                            exit(1)
 
-                                msg = EventMessage(context = self.notification_prefix + 'TransferServiceStatus',
-                                                   content = { 'ingest_server': socket.gethostname(),
-                                                             'message' : status_log_line })
-                                msg.ttl = 3600    # remove message from queue's when not picked up within 1 hours
-                                self.event_bus.send(msg)
 
                     except KeyboardInterrupt:
                         break
@@ -312,6 +365,7 @@ class IngestTransferServer:
                 except Exception as e:
                     logger.error(e)
 
+
 def main():
     # make sure we run in UTC timezone
     import os
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/momclient.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/momclient.py
index 35585189fb10a93484f02eee1dcfee3694ea41a4..55c1ed02374d3c810867c1472912c87796fd0c14 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/momclient.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/momclient.py
@@ -67,6 +67,7 @@ class MoMClient:
             if self.__logged_in:
                 return
 
+            logger.debug("logging in to MoM on url: %s", self.__momURLlogin)
             self.__browser.open(self.__momURLlogin)
             forms = list(self.__browser.forms())
             self.__browser.form = forms[0]
@@ -76,14 +77,18 @@ class MoMClient:
             if 200 != ret_code:
                 raise Exception("Logging into MoM failed: http return code = " + ret_code)
 
+            logger.debug("logged in on MoM on url: %s", self.__momURLlogin)
             self.__logged_in = True
         except Exception as e:
             raise Exception("Logging into MoM on %s failed: %s" % (self.__momURLlogin, str(e)))
 
     def logout(self):
         try:
-            self.__browser.open(self.__momURLlogout)
-            self.__logged_in = False
+            if self.__logged_in:
+                logger.info("logging out of MoM on url: %s", self.__momURLlogout)
+                self.__browser.open(self.__momURLlogout)
+                self.__logged_in = False
+                logger.info("logged out of MoM on url: %s", self.__momURLlogout)
         except Exception as e:
             logger.warning("Logging out of MoM failed: " + str(e))
 
@@ -100,14 +105,13 @@ class MoMClient:
             # often it returns a login page, even when you're logged in
             # so, upon error, retry a couple of times with a pause, else just return
             for mom_retry in range(self.MAX_MOM_RETRIES):
-                if not self.__logged_in:
-                    self.login()
+                self.login()
 
                 params = {"exportId" : export_id, "status" : status_id}
                 statusUrl = self.__momURLsetStatus + '?' + urllib.parse.urlencode(params)
-                logger.debug("updating MoM: " + statusUrl)
+                logger.info("updating MoM: " + statusUrl)
                 response = self.__browser.open(statusUrl)
-                reply = response.readlines()
+                reply = [line.decode('utf-8') for line in response.readlines()]
                 if reply == ['ok']:
                     logger.info('MoMClient.setStatus updated status of %s to %s', export_id, jobState2String(int(status_id)))
 
@@ -123,9 +127,9 @@ class MoMClient:
                         params['message'] = message
 
                         statusUrl = self.__momURLsetStatus + '?' + urllib.parse.urlencode(params)
-                        logger.debug("updating MoM: " + statusUrl)
+                        logger.info("updating MoM: " + statusUrl)
                         response = self.__browser.open(statusUrl)
-                        reply = response.readlines()
+                        reply = [line.decode('utf-8') for line in response.readlines()]
                         if reply == ['ok']:
                             logger.info('MoMClient.setStatus updated status of %s to %s with message: %s',
                                         export_id,
@@ -166,8 +170,7 @@ class MoMClient:
                 start = time.time()
                 logger.info("MoMClient.uploadDataAndGetSIP with archiveId %s - StorageTicket %s - FileName %s - Uri %s", archive_id, storage_ticket, filename, uri)
 
-                if not self.logged_in:
-                    self.login()
+                self.login()
 
                 xmlcontent = """<?xml version="1.0" encoding="UTF-8"?>
                 <lofar:DataProduct archiveId="%s" xmlns:lofar="http://www.astron.nl/MoM2-Lofar">
@@ -198,7 +201,7 @@ class MoMClient:
                 data = urllib.parse.urlencode({"command" : "get-sip-with-input", "xmlcontent" : xmlcontent})
                 # Now get that file-like object again, remembering to mention the data.
                 response = self.__browser.open(self.__momURLgetSIP, data)
-                result = response.read()
+                result = response.read().decode('utf-8')
                 result = result.replace('<stationType>Europe</stationType>', '<stationType>International</stationType>')
 
                 if 'DOCTYPE HTML PUBLIC' in result:
@@ -264,8 +267,7 @@ class MoMClient:
         # so, upon error, retry a couple of times with a pause, else just return
         for mom_retry in range(self.MAX_MOM_RETRIES):
             try:
-                if not self.logged_in:
-                    self.login()
+                self.login()
 
                 mom_id = archive_id - 1000000    # stupid mom one million archive_id offset
 
@@ -273,7 +275,7 @@ class MoMClient:
                 # Now get that file-like object again, remembering to mention the data.
                 logger.info('%s: GetSip call: %s %s', log_prefix, self.__momURLgetSIP, data)
                 response = self.__browser.open(self.__momURLgetSIP, data)
-                result = response.read()
+                result = response.read().decode('utf-8')
 
                 if 'DOCTYPE HTML PUBLIC' in result:
                     logger.error('%s: MoM returned login screen instead of SIP for archive_id=%s mom_id=%s using url %s and data %s',
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/sip.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/sip.py
index cec18b954f5c42479fc05509461fc2a85f967c1a..7c42d02ca0d8e9ca5522d16c1afe055d40aca316 100755
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/sip.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/sip.py
@@ -28,7 +28,7 @@ def validateSIPAgainstSchema(sip, log_prefix=''):
         with open(sip_xsd_path) as xsd_file:
             xsd_contents = etree.parse(xsd_file)
             schema       = etree.XMLSchema(xsd_contents)
-            sip_io       = BytesIO(sip) if isinstance(sip, bytes) else StringIO(sip)
+            sip_io       = BytesIO(sip if isinstance(sip, bytes) else sip.encode('utf-8'))
             sip_xml      = etree.parse(sip_io)
             result       = schema.validate(sip_xml)
             if time.time() - start > 1:
@@ -52,7 +52,7 @@ def checkSIPContent(sip, archive_id=None, filename=None, storage_ticket=None, fi
         logger.debug("%scheckSIPContent starting", log_prefix)
         start  = time.time()
 
-        sip_io   = StringIO(sip)
+        sip_io       = BytesIO(sip if isinstance(sip, bytes) else sip.encode('utf-8'))
         xml_tree     = etree.parse(sip_io)
         xml_root     = xml_tree.getroot()
 
@@ -136,7 +136,7 @@ def addIngestInfoToSIP(sip, storage_ticket, filesize, md5_checksum, adler32_chec
                 storage_ticket, filesize, md5_checksum, adler32_checksum)
 
     from xml.dom import minidom
-    sip_dom = minidom.parseString(sip)
+    sip_dom = minidom.parseString(sip.decode('utf-8') if isinstance(sip, bytes) else sip)
     dp_node = sip_dom.getElementsByTagName('dataProduct')[0]
 
     for elem in dp_node.getElementsByTagName('storageTicket'):
diff --git a/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py b/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py
index be105f09dacbf151c45636065b6859889ef12ebf..b2e55c7c475edce1923bfa60af32c7ca10241e44 100644
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestWebServer/lib/ingestwebserver.py
@@ -22,6 +22,7 @@
 import logging
 import sys
 import os, os.path
+from functools import cmp_to_key
 from datetime import datetime, timedelta
 from lofar.common.util import humanreadablesize
 from lofar.common import isDevelopmentEnvironment
@@ -79,7 +80,7 @@ def index():
 
         return 0
 
-    sorted_items = sorted(list(report.items()), cmp = compare_func)
+    sorted_items = sorted(list(report.items()), key=cmp_to_key(compare_func))
 
     nr_of_jobs_in_queue = 0
     for status_dict in list(report.values()):
diff --git a/MAC/APL/APLCommon/src/swlevel b/MAC/APL/APLCommon/src/swlevel
index cb601b78b9a84b97c91bed89571fe8b146111978..cbeef05c92b2a90a7c135c34e355cf8e259d83b0 100755
--- a/MAC/APL/APLCommon/src/swlevel
+++ b/MAC/APL/APLCommon/src/swlevel
@@ -47,10 +47,47 @@ LEVELTABLE=${ETCDIR}/swlevel.conf
 function getpid {
   PROGRAM="$1"
 
-  # use "pgrep -f" to get the PID of scripts.
-  # only use this as a fall-back when "pgrep" itself returns nothing,
-  # since "pgrep -f" returns false positives for non-scripts.
-  pgrep $PROGRAM || pgrep -f $PROGRAM
+  # Linux tools seem to fail here, since we want to match both
+  # programs and scripts on name:
+  #
+  # pgrep $PROGRAM       : Only matches executables, not scripts.
+  # pgrep -f $PROGRAM    : Matches the program anywhere in the command line, is way too broad
+  # pidof                : Only matches executables, not scripts.
+  # pidof -x             : Cannot match scripts started with #!/usr/bin/env as that changes the executable name used
+
+ps --no-headers -eo pid,cmd | python3 -c '
+import sys
+from os.path import basename
+
+if len(sys.argv) < 2:
+  print("Usage: ps --no-headers -eo pid,cmd | %s progname" % sys.argv[0])
+  sys.exit(1)
+
+PROG=sys.argv[1]
+
+# name of commands that are actually script interpreters
+SCRIPT_INTERPRETERS=["python","python2","python3","bash","sh","tcsh"]
+
+pids = []
+
+for l in sys.stdin.readlines():
+  pid, cmdline = l.split(None, 1)
+
+  # all command-line arguments, but drop any that start with a DASH (except if it is the program name)
+  cmdargs = [a for nr,a in enumerate(cmdline.split()) if nr == 0 or not a.startswith("-")]
+
+  # PROG is invoked directly
+  if basename(cmdargs[0]) == PROG:
+    pids += [pid]
+  # PROG is the first command-line argument to a script
+  elif len(cmdargs) > 1 and basename(cmdargs[0]) in SCRIPT_INTERPRETERS and basename(cmdargs[1]) == PROG:
+    pids += [pid]
+
+if pids:
+  print(" ".join(pids))
+
+sys.exit(0 if pids else 1)
+' "$PROGRAM"
 }
 
 # Counter to indicate if 48V reset has been attempted
diff --git a/MAC/APL/CURTDBDaemons/src/SoftwareMonitor/SoftwareMonitor.cc b/MAC/APL/CURTDBDaemons/src/SoftwareMonitor/SoftwareMonitor.cc
index 47ad8227a36264752bb9931d86fbcd87ef74b6cc..bdaa3531a73d50082cdc4c1eb7fb1208e2bed6e0 100644
--- a/MAC/APL/CURTDBDaemons/src/SoftwareMonitor/SoftwareMonitor.cc
+++ b/MAC/APL/CURTDBDaemons/src/SoftwareMonitor/SoftwareMonitor.cc
@@ -598,7 +598,9 @@ void SoftwareMonitor::_buildProcessMap()
 		snprintf(statFile, sizeof statFile, "/proc/%s/cmdline", dirPtr->d_name);
 		if ((fd = open(statFile, O_RDONLY)) != -1) {
 			if (read(fd, statBuffer, STAT_BUFFER_SIZE-1)) {
-				if (basename(statBuffer) == "python") {
+				if (basename(statBuffer) == "python" 
+                                 || basename(statBuffer) == "python2"
+                                 || basename(statBuffer) == "python3") {
 					// skip python name and try to find real program
 					// note: between python and its argument in a \0 in the cmdline file,
 					//       use lseek to skip the 'python' part of the commandline.
diff --git a/MAC/APL/PIC/RSP_Driver/src/CDOWrite.cc b/MAC/APL/PIC/RSP_Driver/src/CDOWrite.cc
index 4087a3751c88c7e5e68f0acaa8cc333bf80577a8..6b06912b2012fe68f8c5d073257200479a737f49 100644
--- a/MAC/APL/PIC/RSP_Driver/src/CDOWrite.cc
+++ b/MAC/APL/PIC/RSP_Driver/src/CDOWrite.cc
@@ -102,17 +102,6 @@ void CDOWrite::setup_udpip_header(uint32 l_srcip, uint32 l_dstip, uint16 l_dstpo
   m_udpip_hdr.ip.srcip        = htonl(l_srcip);
   m_udpip_hdr.ip.dstip        = htonl(l_dstip);
 
-	// compute header checksum
-	/*
-	stringstream ss;
-	char *from = static_cast<char*>(&m_udpip_hdr.ip);
-	char *to   = static_cast<char*>(&m_udpip_hdr.ip) + sizeof m_udpip_hdr.ip;
-
-        for( char *p = from; p < to; p++)
-          ss << str(format("%02x ") % *p);
-  LOG_INFO_STR("IP Header before calculation: " << ss.str());
-        */
-
 	m_udpip_hdr.ip.hdrchksum = compute_ip_checksum(m_udpip_hdr);
   LOG_INFO(str(format("IP Header checksum: %04x (calculated over %d bytes)") % static_cast<uint16>(m_udpip_hdr.ip.hdrchksum) % sizeof m_udpip_hdr.ip));
 
@@ -137,6 +126,20 @@ CDOWrite::~CDOWrite()
 
 uint16 CDOWrite::compute_ip_checksum(const CDOWrite::UDPIPType &header)
 {
+        /* WARNING:
+         * 
+         * The log output in this function is needed to force the compiler
+         * into the right execution order. This needs to be properly investigated
+         * and fixed, but until then, make sure this function keeps returning the
+         * proper value in setup_udpip_header.
+         *
+         * The incorrect behaviour emerged when switching compiler and to C++11.
+         * It makes this function return alternatingly the right checksum and 0 for
+         * subsequent calls to setup_udpip_header.
+         *
+         * The zero checksum occurs naturally if the correct checksum is already
+         * filled into the checksum field.
+         */
 	const void *addr = &header.ip;
 	int count = sizeof header.ip;
 
diff --git a/MAC/Tools/Power/ec_set_observing.py b/MAC/Tools/Power/ec_set_observing.py
index 2b4fed861b8f0b841a26270c8188d954ea18c51e..ad43b065cdae70f009755633ff0016aeba60908d 100755
--- a/MAC/Tools/Power/ec_set_observing.py
+++ b/MAC/Tools/Power/ec_set_observing.py
@@ -47,6 +47,7 @@ def cmd(command):
     cmd_list = command.split()
     proc = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     (so, se) = proc.communicate()
+    so = so.decode("UTF-8")
     return(so)
 
 # start main()
diff --git a/MAC/Tools/Rubidium/rr.py b/MAC/Tools/Rubidium/rr.py
index 7170fa2c077ac0e35425349860d3b3981d7b49dd..c59e19a3d477bfeff1f5045bb3146d6f09fbaf75 100755
--- a/MAC/Tools/Rubidium/rr.py
+++ b/MAC/Tools/Rubidium/rr.py
@@ -32,7 +32,7 @@ def checkSettings(fp):
         cmd = 'stty < /dev/rubidium'
         cf = Popen(cmd, shell = True, stdout = PIPE, stderr = PIPE)
         (result, resultErr) = cf.communicate()
-        print(result, resultErr)
+        print(result.decode("UTF-8"), resultErr.decode("UTF-8"))
     else:
         return True
 
@@ -43,7 +43,7 @@ def callCommand(cmd):
     cf = Popen(cmd1, shell = True, stdout = PIPE, stderr = PIPE)
         
     (res,resErr) = cf.communicate()
-    result = res
+    result = res.decode("UTF-8")
     return result
 
 
diff --git a/MAC/Tools/Rubidium/rubidium_logger_centos7.py b/MAC/Tools/Rubidium/rubidium_logger_centos7.py
index bbc29c4cddc4c08bee6e7b59bde7f2b3b4695472..f3af7f50692247d688b1ca2ee7844e23f8c50782 100755
--- a/MAC/Tools/Rubidium/rubidium_logger_centos7.py
+++ b/MAC/Tools/Rubidium/rubidium_logger_centos7.py
@@ -55,7 +55,7 @@ def checkSettings(fp):
         cmd = 'timeout -k 5s 5s stty < /dev/rubidium'
         cf = Popen(cmd, shell = True, stdout = PIPE, stderr = PIPE)
         (result, resultErr) = cf.communicate()
-        print(result, resultErr)
+        print(result.decode("UTF-8"), resultErr.decode("UTF-8"))
     else:
         return True
 
@@ -68,6 +68,7 @@ def callCommand(cmd):
     cf = Popen(cmd1, shell = True, stdout = PIPE, stderr = PIPE)
         
     (res,resErr) = cf.communicate()
+    res = res.decode("UTF-8")
     if cf.returncode != -1 and len(res)>0:
         result = res
     else:
@@ -188,6 +189,7 @@ def getHostName():
     cf = Popen(cmd, shell = True, stdout = PIPE, stderr = PIPE)
         
     (res,resErr) = cf.communicate()
+    res = res.decode("UTF-8")
     if cf.returncode != -1 and len(res)>0:
         result = res.strip()
     else:
@@ -298,7 +300,7 @@ class MyTimedRotatingFileHandler(logging.handlers.TimedRotatingFileHandler):
    logging.handlers.TimedRotatingFileHandler.__init__(self,filename, when='midnight', interval=1, backupCount=0, encoding=None)
    cmd = 'timeout 5s ln -fs ' + filename + ' ' + self.dir_log
    cf = Popen(cmd, shell = True, stdout = PIPE, stderr = PIPE)
-   (res,resErr) = cf.communicate()
+   cf.communicate()
    os.chmod(filename,0o644)
    
   def doRollover(self):
@@ -318,7 +320,7 @@ class MyTimedRotatingFileHandler(logging.handlers.TimedRotatingFileHandler):
 
    cmd = 'timeout 5s ln -fs ' + self.baseFilename + ' ' + self.dir_log
    cf = Popen(cmd, shell = True, stdout = PIPE, stderr = PIPE)
-   (res,resErr) = cf.communicate()
+   cf.communicate()
    os.chmod(self.baseFilename,0o644)
    self.rolloverAt = self.rolloverAt + self.interval
 
diff --git a/RTCP/Cobalt/CoInterface/test/tRingCoordinates.py b/RTCP/Cobalt/CoInterface/test/tRingCoordinates.py
index 1ade3829ede1e07104fb37a31e11df43e2e0a0bb..d8bb45e9701390245d1ae6fcd9ade5e65bbe3699 100755
--- a/RTCP/Cobalt/CoInterface/test/tRingCoordinates.py
+++ b/RTCP/Cobalt/CoInterface/test/tRingCoordinates.py
@@ -162,7 +162,7 @@ def getCPPValue(nrings, width, center, type="J2000"):
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
   # wait till end get the cout, cerr and exitvalue
-  (stdoutdata, stderrdata) = process.communicate()
+  (stdoutdata, stderrdata) = [b.decode("UTF-8") for b in process.communicate()]
   exit_status = process.returncode
 
   # exit != 0
diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.py b/RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.py
index b4b351f4a30708e6dddc456deaccc2aea883c685..2e8f252ff378cde17398fa3fda59a273c0b553c8 100644
--- a/RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.py
+++ b/RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.py
@@ -28,7 +28,7 @@ def runAndGetCerrCout(cmd):
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
   # wait till end get the cout, cerr and exitvalue
-  (stdoutdata, stderrdata) = process.communicate()
+  (stdoutdata, stderrdata) = [b.decode("UTF-8") for b in process.communicate()]
   exit_status = process.returncode
 
   # exit != 0
diff --git a/SAS/DataManagement/Cleanup/AutoCleanupService/autocleanupservice b/SAS/DataManagement/Cleanup/AutoCleanupService/autocleanupservice
index b7d3581a426a293b8abe9c1573d84e7e5956afda..9d7973a621f824ab2b505c70c6a076da3705034d 100755
--- a/SAS/DataManagement/Cleanup/AutoCleanupService/autocleanupservice
+++ b/SAS/DataManagement/Cleanup/AutoCleanupService/autocleanupservice
@@ -67,6 +67,15 @@ class AutoCleanupIngestBusListener(IngestBusListener):
         self.__curpc.close()
         super(AutoCleanupIngestBusListener, self).stop_listening()
 
+    def onJobStarted(self, job_dict):
+        self._logJobNotification('started ', job_dict);
+
+    def onJobFinished(self, job_dict):
+        self._logJobNotification('finished', job_dict);
+
+    def onJobFailed(self, job_dict):
+        self._logJobNotification('failed  ', job_dict, level=logging.WARN);
+
     def onTaskProgress(self, task_dict):
         self._logJobNotification('task progress', task_dict);
 
@@ -209,12 +218,9 @@ def main():
                       help='Subject(s) to listen for on the ingest notification queue on the qpid broker, default: %default')
     (options, args) = parser.parse_args()
 
-    setQpidLogLevel(logging.INFO)
     logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                         level=logging.INFO)
 
-    logging.getLogger('lofar.mom.momqueryservice.momqueryrpc').level = logging.WARN
-
     logger.info('Starting AutoCleanupIngestBusListener...')
 
     with AutoCleanupIngestBusListener(notification_queue_name=options.ingest_notification_queuename,
diff --git a/SAS/DataManagement/StorageQueryService/cache.py b/SAS/DataManagement/StorageQueryService/cache.py
index 5276c28dfafe0fb5b76ec48393442a8aa8bb4964..1aa86003b064e1109e5f4a30d741eb2be6cdcb1f 100644
--- a/SAS/DataManagement/StorageQueryService/cache.py
+++ b/SAS/DataManagement/StorageQueryService/cache.py
@@ -343,7 +343,7 @@ class CacheManager:
                 self._updateCEP4CapacitiesInRADB()
 
                 #sleep for a while, (or stop if requested)
-                for i in range(10):
+                for i in range(60):
                     sleep(1)
                     if not self._cacheThreadsRunning:
                         logger.info('exiting _updateCacheThread')
diff --git a/SAS/DataManagement/StorageQueryService/service.py b/SAS/DataManagement/StorageQueryService/service.py
index 8e6214eba234f1d97b6710254be36dfa901a61f1..6dd18a157cb1954de3c970ce918e540567566807 100644
--- a/SAS/DataManagement/StorageQueryService/service.py
+++ b/SAS/DataManagement/StorageQueryService/service.py
@@ -67,7 +67,7 @@ def createService(busname=DEFAULT_BUSNAME, servicename=DEFAULT_SERVICENAME, brok
                    busname=busname,
                    broker=broker,
                    use_service_methods=True,
-                   numthreads=4,
+                   numthreads=1,
                    verbose=verbose,
                    handler_args={'mountpoint': mountpoint,
                                  'radb_busname':RADB_BUSNAME,
diff --git a/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice b/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice
index 848554123683fc74ba0c82766ca43225a51c17d6..455da6361bed569a2a64080867e0fb949d381571 100755
--- a/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice
+++ b/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice
@@ -4,12 +4,8 @@
 '''
 runs the momqueryservice
 '''
-import logging
-
-logger = logging.getLogger(__name__)
 
 from lofar.mom.momqueryservice.momqueryservice import main
 
 if __name__ == '__main__':
-    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)
     main()
diff --git a/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice.py b/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice.py
index 384fa8f70c02d6d37ab357d4b7c919e505d269f7..b8583106b8333cca71f2614b62c754a5b20b56fe 100755
--- a/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice.py
+++ b/SAS/MoM/MoMQueryService/MoMQueryServiceServer/momqueryservice.py
@@ -94,24 +94,49 @@ def _toIdsString(ids):
 
 
 class MoMDatabaseWrapper:
-    """handler class for details query in mom db"""
+    """Handler class for details query in mom db.
+
+       Note that transactions are NOT supported."""
     def __init__(self, dbcreds):
         self.dbcreds = dbcreds
         self.conn = None
+        self.cursor = None
 
         self.useradministration_db = dbcreds.config["useradministration_database"]
         self.momprivilege_db = dbcreds.config["momprivilege_database"]
 
-    def _connect(self):
-        if self.conn:
+    def connect(self):
+        if self.conn is None:
+            connect_options = self.dbcreds.mysql_connect_options()
+            connect_options['connection_timeout'] = 5
+
+            logger.info("Connecting to %s", self.dbcreds.stringWithHiddenPassword())
+            self.conn = connector.connect(**connect_options)
+            logger.info("Connected to %s", self.dbcreds.stringWithHiddenPassword())
+
+            # Make sure we get fresh data for each SELECT. Alternatively, we could call
+            # commit() after each SELECT.
+            #
+            # Note that we can only set the transaction isolation level if there is no
+            # transaction going on (uncommitted reads or writes).
+            self.cursor = self.conn.cursor(dictionary=True)
+            self.cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED");
+
+    def disconnect(self):
+        if self.conn is not None:
+            logger.info("Disconnecting from %s", self.dbcreds.stringWithHiddenPassword())
+            self.cursor.close()
+            self.cursor = None
             self.conn.close()
+            self.conn = None
+            logger.info("Disconnected from %s", self.dbcreds.stringWithHiddenPassword())
 
-        connect_options = self.dbcreds.mysql_connect_options()
-        connect_options['connection_timeout'] = 5
+    def __enter__(self):
+        self.connect()
+        return self
 
-        logger.info("Connecting to %s", self.dbcreds.stringWithHiddenPassword())
-        self.conn = connector.connect(**connect_options)
-        logger.debug("Connected to %s", self.dbcreds.stringWithHiddenPassword())
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.disconnect()
 
     def _executeSelectQuery(self, query, data=None):
         # try to execute query on flaky lofar mysql connection
@@ -125,14 +150,16 @@ class MoMDatabaseWrapper:
 
         for i in range(maxtries):
             try:
-                self._connect()
-                cursor = self.conn.cursor(dictionary=True)
-                cursor.execute(query, data)
-                return cursor.fetchall()
+                self.connect()
+                self.cursor.execute(query, data)
+                return self.cursor.fetchall()
             except (OperationalError, AttributeError) as e:
                 logger.error(str(e))
 
-                if i+1 == maxtries: raise e
+                if i+1 == maxtries:
+                    raise e
+
+                self.disconnect()
 
     def _executeInsertQuery(self, query, data=None):
         # try to execute query on flaky lofar mysql connection
@@ -143,15 +170,17 @@ class MoMDatabaseWrapper:
 
         for i in range(maxtries):
             try:
-                self._connect()
-                cursor = self.conn.cursor(dictionary=True)
-                cursor.execute(query, data)
+                self.connect()
+                self.cursor.execute(query, data)
                 self.conn.commit()
-                return cursor.lastrowid
+                return self.cursor.lastrowid
             except (OperationalError, AttributeError) as e:
                 logger.error(str(e))
 
-                if i+1 == maxtries: raise e
+                if i+1 == maxtries:
+                    raise e
+
+                self.disconnect()
 
     def _executeUpdateQuery(self, query, data=None):
         # try to execute query on flaky lofar mysql connection
@@ -162,16 +191,17 @@ class MoMDatabaseWrapper:
 
         for i in range(maxtries):
             try:
-                self._connect()
-                cursor = self.conn.cursor(dictionary=True)
-                cursor.execute(query, data)
-                rowcount = cursor.rowcount
+                self.connect()
+                self.cursor.execute(query, data)
+                rowcount = self.cursor.rowcount
                 self.conn.commit()
                 return rowcount
             except (OperationalError, AttributeError) as e:
                 logger.error(str(e))
 
-                if i + 1 == maxtries: raise e
+                if i + 1 == maxtries:
+                    raise e
+                self.disconnect()
 
     def add_trigger(self, user_name, host_name, project_name, meta_data):
         logger.info("add_trigger for user_name: %s, host_name: %s, project_name: %s, meta_data: %s",
@@ -1194,7 +1224,8 @@ where project.mom2id = %s and (project_role.name = "Pi" or project_role.name = "
         result = {"minStartTime": None, "minDuration": None, "maxDuration": None, "maxEndTime": None, "trigger_id": None}
         misc = self._get_misc_contents(mom_id)
         if misc is None:
-            raise ValueError("mom_id (%s) not found in MoM database" % mom_id)
+            logger.info("no misc contents found for mom_id %s db", mom_id)
+            return result
 
         if "trigger_id" in misc:
             result["trigger_id"] = misc['trigger_id']
@@ -1224,10 +1255,10 @@ where project.mom2id = %s and (project_role.name = "Pi" or project_role.name = "
 
         misc = self._get_misc_contents(mom_id)
         if misc is None:
-            raise ValueError("mom_id (%s) not found in MoM database" % mom_id)
-        if 'stationSelection' not in misc:
-            raise ValueError("misc field for mom_id (%s) does not contain stationSelection" % mom_id)
-        station_selection = misc['stationSelection']
+            logger.info("no misc contents found for mom_id %s db", mom_id)
+            return None
+
+        station_selection = misc.get('stationSelection')
 
         logger.info("get_station_selection for mom_id (%s): %s", mom_id, station_selection)
 
@@ -1244,7 +1275,8 @@ where project.mom2id = %s and (project_role.name = "Pi" or project_role.name = "
 
         misc = self._get_misc_contents(mom_id)
         if misc is None:
-            raise ValueError("mom_id (%s) not found in MoM database" % mom_id)
+            logger.info("no misc contents found for mom_id %s db", mom_id)
+            return None
         storagemanager = misc.get('storagemanager')
 
         logger.info("get_storagemanager for mom_id (%s): %s", mom_id, storagemanager)
@@ -1296,6 +1328,9 @@ class ProjectDetailsQueryHandler(MessageHandlerInterface):
     def prepare_loop(self):
         self.momdb = MoMDatabaseWrapper(self.dbcreds)
 
+    def finalize_loop(self):
+        self.momdb.disconnect()
+
     def add_trigger(self, user_name, host_name, project_name, meta_data):
         row_id = self.momdb.add_trigger(user_name, host_name, project_name, meta_data)
         self.momdb.update_trigger_quota(project_name)
@@ -1432,7 +1467,7 @@ def createService(busname=DEFAULT_MOMQUERY_BUSNAME,
     return Service(servicename,
                    handler,
                    busname=busname,
-                   numthreads=8,
+                   numthreads=1,
                    use_service_methods=True,
                    verbose=False,
                    broker=broker,
@@ -1443,6 +1478,7 @@ def main():
     """
     Starts the momqueryservice.GetObjectDetails service
     """
+    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)
 
     # Check the invocation arguments
     parser = OptionParser("%prog [options]",
@@ -1469,5 +1505,4 @@ def main():
         waitForInterrupt()
 
 if __name__ == '__main__':
-    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO)
     main()
diff --git a/SAS/MoM/MoMQueryService/test/t_momqueryservice.py b/SAS/MoM/MoMQueryService/test/t_momqueryservice.py
index 43f6e29912081f38bcfc580f316916378916dd4a..14d005fd57d2f924117eeb1ac8391d34d1fc6ef6 100755
--- a/SAS/MoM/MoMQueryService/test/t_momqueryservice.py
+++ b/SAS/MoM/MoMQueryService/test/t_momqueryservice.py
@@ -1636,19 +1636,17 @@ class TestMoMDatabaseWrapper(unittest.TestCase):
         result = self.mom_database_wrapper.get_station_selection(self.mom_id)
         self.assertEqual(result, expected_result)
 
-    def test_get_station_selection_throws_ValueError_on_empty_query_result(self):
+    def test_get_station_selection_on_empty_query_result(self):
         self.mysql_mock.connect().cursor().fetchall.return_value = []
 
-        with self.assertRaises(ValueError):
-            self.mom_database_wrapper.get_station_selection(1234)
+        self.assertIsNone(self.mom_database_wrapper.get_station_selection(1234))
 
-    def test_get_station_selection_throws_ValueError_if_station_selection_not_present_in_misc(self):
+    def test_get_station_selection_if_station_selection_not_present_in_misc(self):
         details_result = [{"mom2id": self.mom_id, "mom2objecttype": self.job_type,
                            "misc": json.dumps({"timeWindow": {'minDuration': 300, 'maxDuration': 300}})}]
         self.mysql_mock.connect().cursor().fetchall.return_value = details_result
 
-        with self.assertRaises(ValueError):
-            self.mom_database_wrapper.get_station_selection(1234)
+        self.assertIsNone(self.mom_database_wrapper.get_station_selection(1234))
 
     def test_get_time_restrictions_returns_misc_field_info_from_query_result(self):
         min_start_time = "2017-01-01T12:00:00"
@@ -1685,11 +1683,15 @@ class TestMoMDatabaseWrapper(unittest.TestCase):
         self.assertEqual(result['minDuration'], None)
         self.assertEqual(result['maxDuration'], None)
 
-    def test_get_time_restrictions_throws_ValueError_on_empty_query_result(self):
+    def test_get_time_restrictions_on_empty_query_result(self):
         self.mysql_mock.connect().cursor().fetchall.return_value = []
 
-        with self.assertRaises(ValueError):
-            self.mom_database_wrapper.get_trigger_time_restrictions(1234)
+        expected_sane_default = {'minStartTime': None,
+                                 'minDuration': None,
+                                 'maxDuration': None,
+                                 'maxEndTime': None,
+                                 'trigger_id': None}
+        self.assertEqual(expected_sane_default, self.mom_database_wrapper.get_trigger_time_restrictions(1234))
 
     def test_get_time_restrictions_throws_NotImplementedError_when_misc_has_timeWindow_but_no_trigger_id(self):
         min_start_time = "2017-01-01T12:00:00"
@@ -1761,16 +1763,15 @@ class TestMoMDatabaseWrapper(unittest.TestCase):
         result = self.mom_database_wrapper.get_storagemanager(self.mom_id)
         self.assertEqual(result, value)
 
-    def test_get_storagemanager_throws_ValueError_on_empty_query_result(self):
+    def test_get_storagemanager_on_empty_query_result(self):
         self.mysql_mock.connect().cursor().fetchall.return_value = []
-        with self.assertRaises(ValueError):
-            self.mom_database_wrapper.get_storagemanager(1234)
+        self.assertIsNone(self.mom_database_wrapper.get_storagemanager(1234))
 
-    def test_get_storagemanager_if_station_selection_not_present_in_misc(self):
+    def test_get_storagemanager_returns_None_if_station_selection_not_present_in_misc(self):
         details_result = [{"misc": json.dumps({"timeWindow": {'minDuration': 300, 'maxDuration': 300}})}]
         self.mysql_mock.connect().cursor().fetchall.return_value = details_result
 
-        self.assertEqual(None, self.mom_database_wrapper.get_storagemanager(1234))
+        self.assertIsNone(self.mom_database_wrapper.get_storagemanager(1234))
 
 @unittest.skip("Skipping integration test")
 class IntegrationTestMoMDatabaseWrapper(unittest.TestCase):
diff --git a/SAS/OTB/OTB/dist-src/OTB b/SAS/OTB/OTB/dist-src/OTB
index 548d04a19aba7075f9444a808f787841c17d7b3d..4d089a8dcba6ef808930449f3f6c384127753ad7 100755
--- a/SAS/OTB/OTB/dist-src/OTB
+++ b/SAS/OTB/OTB/dist-src/OTB
@@ -163,7 +163,7 @@ ps -ef | grep -v grep | grep java | grep server | grep $port1 2>&1 1>/dev/null
 if [ $? -ne 0 ]; then
     # use dbcredentials.py to extract OTDB credentials in shell form
     source /opt/lofar/lofarinit.sh
-    declare `dbcredentials -D jOTDBServer -S`
+    declare `dbcredentials -D jOTDBserver -S`
 
     $JAVA_HOME/bin/java -cp $CLASSPATH nl.astron.lofar.sas.otb.jotdb3.jOTDBserver -s `hostname` -d $DBHOST -P $DBPORT -p $port1 -o $port2 -U $DBUSER -W "$DBPASSWORD" 2>&1 1>&$logfile &
     serverpid=$!
diff --git a/SAS/OTB/jOTDB3/dist-src/jOTDB3.log_prop b/SAS/OTB/jOTDB3/dist-src/jOTDB3.log_prop
index bbcdaaf80ffcbb41a101addaedb366dedaa3790c..2b8b02c46119e1e6256d7f9643446abc262cc8af 100644
--- a/SAS/OTB/jOTDB3/dist-src/jOTDB3.log_prop
+++ b/SAS/OTB/jOTDB3/dist-src/jOTDB3.log_prop
@@ -9,7 +9,7 @@ log4j.appender.STDOUT.layout.ConversionPattern=%d [%t] %-5p (%F:%L) %m%n
 
 # Define the Java FILE appender
 log4j.appender.FILE=org.apache.log4j.RollingFileAppender
-log4j.appender.FILE.File=/opt/sas/log/jOTDB3.log
+log4j.appender.FILE.File=/opt/lofar/var/log/jOTDB3.log
 log4j.appender.FILE.MaxFileSize=10MB
 log4j.appender.FILE.MaxBackupIndex=10
 log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
@@ -29,7 +29,7 @@ log4cplus.appender.STDOUT.ImmediateFlush=true
 
 # Define the ROLFILE appender
 log4cplus.appender.ROLFILE=log4cplus::RollingFileAppender
-log4cplus.appender.ROLFILE.File=/opt/sas/log/jOTDB3-C++.log
+log4cplus.appender.ROLFILE.File=/opt/lofar/var/log/jOTDB3-C++.log
 log4cplus.appender.ROLFILE.MaxFileSize=10MB
 log4cplus.appender.ROLFILE.MaxBackupIndex=10
 log4cplus.appender.ROLFILE.layout=log4cplus::PatternLayout
diff --git a/SAS/OTDB_Services/TreeStatusEvents.py b/SAS/OTDB_Services/TreeStatusEvents.py
index e9d6177de0696f5a6886484de9712880079f1cde..801b303d85d1145426a3a9aa788328ad55064117 100755
--- a/SAS/OTDB_Services/TreeStatusEvents.py
+++ b/SAS/OTDB_Services/TreeStatusEvents.py
@@ -157,7 +157,7 @@ def create_service(busname, dbcreds):
                         logger.info("sending message treeid %s state %s modtime %s" % (treeid, allowed_states.get(state, "unknown_state"), modtime))
                         send_bus.send(msg)
 
-                        logger.info("new start_time:=%s" % (creation,))
+                        logger.debug("new start_time:=%s" % (creation,))
 
                         try:
                             with open(treestatuseventfilename, 'w') as f:
@@ -175,4 +175,4 @@ def create_service(busname, dbcreds):
                 time.sleep(2)
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/SAS/ResourceAssignment/RAScripts/povero b/SAS/ResourceAssignment/RAScripts/povero
index 2df86825331a727df09e43b51b500cf9bd17e7b9..9201e1e6f67580abc12dc69deb4db7a8166d5de5 100755
--- a/SAS/ResourceAssignment/RAScripts/povero
+++ b/SAS/ResourceAssignment/RAScripts/povero
@@ -29,6 +29,7 @@ import logging
 import subprocess
 
 from lofar.parameterset import parameterset
+from lofar.common.subprocess_utils import communicate_returning_strings
 
 from lofar.sas.resourceassignment.resourceassignmentservice.rpc import RARPC
 from lofar.sas.resourceassignment.resourceassignmentservice.config import DEFAULT_BUSNAME as DEFAULT_RADB_BUSNAME
@@ -41,10 +42,10 @@ from lofar.messaging import setQpidLogLevel
 logger = logging.getLogger(__name__)
 
 def getSlurmStats(otdb_id):
-    cmd = ['ssh', 'lofarsys@head01.cep4.control.lofar', 'sacct', '-o', 'jobid,cputimeraw,nnodes', '--name=%s' % otdb_id, '-S', '2016-01-01', '-X', '--parsable2', '-n']
+    cmd = ['ssh', 'lofarsys@head.cep4.control.lofar', 'sacct', '-o', 'jobid,cputimeraw,nnodes', '--name=%s' % otdb_id, '-S', '2016-01-01', '-X', '--parsable2', '-n']
     logger.debug(' '.join(cmd))
     proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    out, err = proc.communicate()
+    out, err = communicate_returning_strings(proc)
 
     if proc.returncode == 0:
         try:
diff --git a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py
index a0116e74553c54fa1382ea80524993ccdae29df5..abb5ae76cd9a2108269a03713632516be579505a 100755
--- a/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py
+++ b/SAS/ResourceAssignment/RAtoOTDBTaskSpecificationPropagator/lib/translator.py
@@ -227,11 +227,11 @@ class RAtoOTDBTranslator():
                 # The tab nr dim must be and remain the outer dim, even though it's sliced from the parts dim.
                 # NOTE: nr_cs_stokes must be the same in all SAPs with CS TABs, see reordering code below.
                 nr_cs_stokes = sap['properties']['nr_of_cs_stokes']  # the 'cs_stokes' term here can also mean cv XXYY
-                nr_parts = sap['properties']['nr_of_cs_files'] / nr_cs_stokes  # in this prop's claim!
+                nr_parts = int(sap['properties']['nr_of_cs_files'] / nr_cs_stokes)  # in this prop's claim!
                 nparts_tab = nr_parts_per_tab_per_sap[sap_nr]  # alias for readability; this is also per stokes
                 while nr_parts > 0:
-                    tab_nr      = next_tab_part_nrs_per_sap[sap_nr] / nparts_tab
-                    tab_part_nr = next_tab_part_nrs_per_sap[sap_nr] % nparts_tab
+                    tab_nr      = int(next_tab_part_nrs_per_sap[sap_nr] / nparts_tab)
+                    tab_part_nr = int(next_tab_part_nrs_per_sap[sap_nr] % nparts_tab)
                     nparts_remain = min(nr_parts, nparts_tab - tab_part_nr)  # nr parts left before we go to the next tab
 
                     if is_tab_nrs_per_sap[sap_nr] != -1 and tab_nr >= is_tab_nrs_per_sap[sap_nr]:
@@ -262,7 +262,7 @@ class RAtoOTDBTranslator():
             filenames2_per_sap[sap_nr] = [None] * len(filenames_per_sap[sap_nr])
 
             nr_parts = nr_parts_per_tab_per_sap[sap_nr]
-            nr_tabs = len(locations_per_sap[sap_nr]) / (nr_cs_stokes * nr_parts)
+            nr_tabs = int(len(locations_per_sap[sap_nr]) / (nr_cs_stokes * nr_parts))
             for tab_nr in range(nr_tabs):
                 for part_nr in range(nr_parts):
                     for stokes_nr in range(nr_cs_stokes):
@@ -319,8 +319,8 @@ class RAtoOTDBTranslator():
                 # which is parts within stokes. We don't yet know the total nr of files.
                 # First, do stokes within parts, then later reorder when we have all names.
                 # NOTE: nr_is_stokes must be the same in all SAPs with an IS TAB, see reordering code below.
-                nr_is_stokes = sap['properties']['nr_of_is_stokes']
-                nr_parts = sap['properties']['nr_of_is_files'] / nr_is_stokes  # in this prop's claim!
+                nr_is_stokes = int(sap['properties']['nr_of_is_stokes'])
+                nr_parts = int(sap['properties']['nr_of_is_files'] / nr_is_stokes)  # in this prop's claim!
                 next_part_nr = next_tab_part_nrs_per_sap[sap_nr]
                 for part_nr in range(next_part_nr, next_part_nr + nr_parts):
                     for stokes_nr in range(nr_is_stokes):
diff --git a/SAS/ResourceAssignment/ResourceAssigner/lib/resource_assigner.py b/SAS/ResourceAssignment/ResourceAssigner/lib/resource_assigner.py
index 955890d8ad9f9ac42142a29a1fe5139f91b7281b..7d873bad86a63e71be33d657780150e73af6135e 100755
--- a/SAS/ResourceAssignment/ResourceAssigner/lib/resource_assigner.py
+++ b/SAS/ResourceAssignment/ResourceAssigner/lib/resource_assigner.py
@@ -116,12 +116,12 @@ class ResourceAssigner(object):
         :param radb_dbcreds: the credentials to be used for accessing the RADB (default: None, which means default)
         """
 
-        self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker, timeout=180)
-        self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True, timeout=180)
-        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker, timeout=180)
-        self.momrpc = MoMQueryRPC(servicename=mom_servicename, busname=mom_busname, broker=broker, timeout=180)
-        self.sqrpc = StorageQueryRPC(busname=storagequery_busname, servicename=storagequery_servicename, broker=broker)
-        self.curpc = CleanupRPC(busname=cleanup_busname, servicename=cleanup_servicename, broker=broker)
+        self.radbrpc = RARPC(servicename=radb_servicename, busname=radb_busname, broker=broker, timeout=30)
+        self.rerpc = RPC(re_servicename, busname=re_busname, broker=broker, ForwardExceptions=True, timeout=30)
+        self.otdbrpc = OTDBRPC(busname=otdb_busname, servicename=otdb_servicename, broker=broker, timeout=30)
+        self.momrpc = MoMQueryRPC(servicename=mom_servicename, busname=mom_busname, broker=broker, timeout=30)
+        self.sqrpc = StorageQueryRPC(busname=storagequery_busname, servicename=storagequery_servicename, broker=broker, timeout=30)
+        self.curpc = CleanupRPC(busname=cleanup_busname, servicename=cleanup_servicename, broker=broker, timeout=120)
         self.ra_notification_bus = ToBus(address=ra_notification_busname, broker=broker)
         self.ra_notification_prefix = ra_notification_prefix
         self.obscontrol = ObservationControlRPCClient(busname=observation_control_busname,
diff --git a/SAS/ResourceAssignment/ResourceAssigner/lib/schedulechecker.py b/SAS/ResourceAssignment/ResourceAssigner/lib/schedulechecker.py
index 41e179056efbf87e8c0bd6932cabce601345f477..581d65b92d80bce112b288ce1b67bbaf577aab5a 100644
--- a/SAS/ResourceAssignment/ResourceAssigner/lib/schedulechecker.py
+++ b/SAS/ResourceAssignment/ResourceAssigner/lib/schedulechecker.py
@@ -200,7 +200,9 @@ class ScheduleChecker():
         """
         try:
             logger.info('checking unfinished tasks for status in mom')
-            unrun_tasks = self._radbrpc.getTasks(task_status=['approved', 'scheduled', 'prescheduled', 'queued', 'error', 'aborted'], task_type=['observation', 'pipeline'])
+            unrun_tasks = self._radbrpc.getTasks(task_status=['approved', 'scheduled', 'prescheduled', 'queued', 'error', 'aborted'],
+                                                 task_type=['observation', 'pipeline'],
+                                                 lower_bound=datetime.utcnow() - timedelta(minutes=30))
             mom_ids = [t['mom_id'] for t in unrun_tasks]
             mom_details = self._momrpc.getObjectDetails(mom_ids)
 
@@ -210,26 +212,26 @@ class ScheduleChecker():
                     mom_status = mom_details[mom_id].get('object_status') if mom_id in mom_details else None
                     if (mom_id not in mom_details or
                         mom_status in ['opened', 'described', 'suspended']):
-                        logger.info('task %s mom_id=%s otdb_id=%s has radb_status=%s and mom_status=%s => removing task from radb',
+                        logger.warning('task %s mom_id=%s otdb_id=%s has radb_status=%s and mom_status=%s => Would normally remove task from radb, ignoring for now',
                                     task['id'],
                                     task['mom_id'],
                                     task['otdb_id'],
                                     task['status'],
                                     mom_status)
 
-                        if mom_status in ['opened', 'described']:
-                            # auto delete data for tasks which went back to opened in mom (for pipeline restarts for example)
-                            # The reason to delete it here is because otherwise the cleanupservice tries to get it's info from an already deleted task in radb/otdb
-                            path_result = self._curpc.getPathForOTDBId(task['otdb_id'])
-                            if path_result['found']:
-                                logger.info("removing data on disk from previous run for otdb_id %s", task['otdb_id'])
-                                result = self._curpc.removeTaskData(task['otdb_id'])
+                        #if mom_status in ['opened', 'described']:
+                            ## auto delete data for tasks which went back to opened in mom (for pipeline restarts for example)
+                            ## The reason to delete it here is because otherwise the cleanupservice tries to get it's info from an already deleted task in radb/otdb
+                            #path_result = self._curpc.getPathForOTDBId(task['otdb_id'])
+                            #if path_result['found']:
+                                #logger.info("removing data on disk from previous run for otdb_id %s", task['otdb_id'])
+                                #result = self._curpc.removeTaskData(task['otdb_id'])
 
-                                if not result['deleted']:
-                                    logger.warning("could not remove all data on disk from previous run for otdb_id %s: %s", task['otdb_id'], result['message'])
+                                #if not result['deleted']:
+                                    #logger.warning("could not remove all data on disk from previous run for otdb_id %s: %s", task['otdb_id'], result['message'])
 
-                        # delete the spec (and task/claims etc via cascading delete) from radb to get it in sync again with mom
-                        self._radbrpc.deleteSpecification(task['specification_id'])
+                        ## delete the spec (and task/claims etc via cascading delete) from radb to get it in sync again with mom
+                        #self._radbrpc.deleteSpecification(task['specification_id'])
                 except Exception as e:
                     logger.error("Error while checking unrun task mom_id=%s otdb_id=%s radb_id=%s for MoM opened/described/suspended status: %s",
                                  task['mom_id'],
diff --git a/SAS/XML_generator/test/test_regression.py b/SAS/XML_generator/test/test_regression.py
index 557f0fb6c971d953e9a0ee58801cc24d32763c2c..bb27372cbd7ebda36bad1851fcf09b181e8398de 100755
--- a/SAS/XML_generator/test/test_regression.py
+++ b/SAS/XML_generator/test/test_regression.py
@@ -1,6 +1,21 @@
 #!/usr/bin/env python3
 import sys, os, subprocess, difflib, shutil
 
+# Copied from LCS/PyCommon/subprocess_utils
+def _convert_bytes_tuple_to_strings(bytes_tuple):
+    """Helper function for subprocess.communicate() and/or subprocess.check_output which changed from python2 to python3.
+    This function returns the bytes in the bytes_tuple_tuple to utf-8 strings.
+    You can use this to get the "normal" python2 subprocess behaviour back for functions like check_output and/or communicate."""
+    return tuple('' if x is None
+                 else x.decode('UTF-8') if isinstance(x, bytes)
+                 else x
+                 for x in bytes_tuple)
+
+def communicate_returning_strings(proc, input=None):
+    """Helper function for subprocess.communicate() which changed from python2 to python3.
+    This function waits for the subprocess to finish and returns the stdout and stderr as utf-8 strings, just like python2 did."""
+    return _convert_bytes_tuple_to_strings(proc.communicate(input=input))
+
 
 # diff should only be something like:
 # 3,5c3,5
@@ -34,7 +49,7 @@ def main(verbose_tests=False, regenerate_golden_output=False):
         print("*** Processing %s ***" % infile)
         cmd = ["xmlgen", "-i", "./txt/%s" % infile, "-o", "test.xml"]
         p = subprocess.Popen(cmd, stdin=open('/dev/null'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        out, err = p.communicate()
+        out, err = communicate_returning_strings(p)
         if verbose_tests and p.returncode == 1:
             print(out)
             print(err)
@@ -50,7 +65,7 @@ def main(verbose_tests=False, regenerate_golden_output=False):
             ## -w ignores differences in whitespace
             ## -I '^[[:space:]]*$' because -B doesn't work for blank lines (on OSX?)
             p = subprocess.Popen(cmd, stdin=open('/dev/null'), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            logs = p.communicate()
+            logs = communicate_returning_strings(p)
             diffs = logs[0].splitlines()  # stdout
             print("diff reply was %i lines long" % len(diffs))
             check = checkDiff(diffs) and len(logs[1]) == 0
diff --git a/SubSystems/SAS_OTDB/CMakeLists.txt b/SubSystems/SAS_OTDB/CMakeLists.txt
index 4a7756ce69b62d257a03e30114319e55c0a48856..beb06dd928c53f4b198ed1de55ee42754e195ac3 100644
--- a/SubSystems/SAS_OTDB/CMakeLists.txt
+++ b/SubSystems/SAS_OTDB/CMakeLists.txt
@@ -1,4 +1,5 @@
 # $Id$
 
+# PyCommon is needed for OTB which is installed on the same server
 lofar_package(SAS_OTDB 
-  DEPENDS Deployment OTDB_Comps OTDB_SQL StaticMetaData WinCC_Datapoints)
+  DEPENDS Deployment OTDB_Comps OTDB_SQL StaticMetaData WinCC_Datapoints PyCommon)