diff --git a/tangostationcontrol/tangostationcontrol/clients/tcp_replicator.py b/tangostationcontrol/tangostationcontrol/clients/tcp_replicator.py index 9feb04c7b9077ed8efc35d06bcaa4216ca0d951e..f49e7fce2ef814e5a44fc4d58b40e5534d4271cd 100644 --- a/tangostationcontrol/tangostationcontrol/clients/tcp_replicator.py +++ b/tangostationcontrol/tangostationcontrol/clients/tcp_replicator.py @@ -4,6 +4,7 @@ from threading import Semaphore from threading import Thread import asyncio import logging +import sys from tangostationcontrol.clients.statistics_client_thread import StatisticsClientThread @@ -351,4 +352,25 @@ class TCPReplicator(Thread, StatisticsClientThread): def nof_tasks_pending(self): """ Return the number of pending tasks in our event loop. """ - return len(asyncio.all_tasks(self._loop)) + # asyncio.all_tasks is not thread safe, and can fail on a race condition if another + # thread adds a task to a different loop while we're in all_tasks(). + # We thus occasionally need to retry. We try a limited number of times to avoid + # infinite loops. + # + # See https://bugs.python.org/issue36607 and https://support.astron.nl/jira/browse/L2SS-560 + # + # This is fixed (in a similar manner as here) in python 3.7.4+, see + # https://github.com/python/cpython/blob/v3.7.3/Lib/asyncio/tasks.py#L34 + # versus + # https://github.com/python/cpython/blob/v3.7.4/Lib/asyncio/tasks.py#L34 + if sys.version_info >= (3,7,4): + return asyncio.all_tasks(self._loop) + else: + for i in range(100,0,-1): + try: + return len(asyncio.all_tasks(self._loop)) + except RuntimeError as e: + if i == 1: + # ran out of tries, and we want to expose the original exception + raise +