From da3a4aabb42d809b1bcea344b5d754a68ffd47cd Mon Sep 17 00:00:00 2001 From: Jan David Mol <mol@astron.nl> Date: Fri, 4 Dec 2020 09:54:57 +0100 Subject: [PATCH] Only reconnect once if connection fails -- we only want to catch exotic errors, not cover up hardware/infra failure --- RCUSCC/RCUSCC/RCUSCC.py | 58 +++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/RCUSCC/RCUSCC/RCUSCC.py b/RCUSCC/RCUSCC/RCUSCC.py index 73cf614ca..7655e4e7a 100644 --- a/RCUSCC/RCUSCC/RCUSCC.py +++ b/RCUSCC/RCUSCC/RCUSCC.py @@ -74,38 +74,34 @@ class OPCUAConnection(Thread): Run a connector function in the background, until it succeeds. """ - def __init__(self, client, init_func, fault_func, streams, try_interval=2): + def __init__(self, client, on_func, fault_func, streams, try_interval=2): super().__init__(daemon=True) self.client = client - self.init_func = init_func + self.on_func = on_func self.fault_func = fault_func self.try_interval = try_interval self.streams = streams self.stopping = False - - self.start() + self.connected = False def _servername(self): return self.client.server_url.geturl() - def try_connect(self): + def connect(self): try: self.streams.debug_stream("Connecting to server %s", self._servername()) self.client.connect() + self.connected = True self.streams.debug_stream("Connected to server. Initialising.") - - self.init_func() - return True except socket.error as e: self.streams.error_stream("Could not connect to server %s: %s", self._servername(), e) - - # signal that we're disconnected - self.fault_func() return False - def try_disconnect(self): + def disconnect(self): + self.connected = False # always force a reconnect, regardless of a successful disconnect + try: self.client.disconnect() except Exception as e: @@ -114,8 +110,14 @@ class OPCUAConnection(Thread): def run(self): while not self.stopping: # keep trying to connect - while not self.stopping and not self.try_connect(): - time.sleep(self.try_interval) + if not self.connected: + if self.connect(): + self.on_func() + else: + # we retry only once, to catch exotic network issues. if the infra or hardware is down, + # our device cannot help, and must be reinitialised after the infra or hardware is fixed. + self.fault_func() + return # keep checking if the connection is still alive try: @@ -126,7 +128,7 @@ class OPCUAConnection(Thread): self.streams.error_stream("Lost connection to server %s: %s", self._servername(), e) # technically, we may not have dropped the connection, but encounter a different error. so explicitly disconnect. - self.try_disconnect() + self.disconnect() # signal that we're disconnected self.fault_func() @@ -139,7 +141,7 @@ class OPCUAConnection(Thread): self.stopping = True self.join() - self.try_disconnect() + self.disconnect() class RCUSCC(Device): """ @@ -262,7 +264,7 @@ class RCUSCC(Device): return DummyNode() - def _init_opcua(self): + def _map_attributes(self): try: self.name_space_index = self.client.get_namespace_index("http://lofar.eu") except Exception as e: @@ -367,7 +369,21 @@ class RCUSCC(Device): self.client = opcua.Client("opc.tcp://{}:{}/".format(self.OPC_Server_Name, self.OPC_Server_Port), self.OPC_Time_Out) # timeout in seconds # Connect to OPC-UA -- will set ON state on success - self.opcua_connection = OPCUAConnection(self.client, self._init_opcua, self.Fault, self) + self.opcua_connection = OPCUAConnection(self.client, self.On, self.Fault, self) + + if not self.opcua_connection.connect(): + # hardware or infra is down -- needs fixing first + self.Fault() + return + + # Retrieve and map server attributes + self._map_attributes() + + # Start keep-alive + self.opcua_connection.start() + + # Everything went ok -- go online + self.On() # PROTECTED REGION END # // RCUSCC.init_device @@ -568,11 +584,15 @@ class RCUSCC(Device): :return:None """ + # Turn off self.set_state(DevState.OFF) - # stop reconnecting before disconnect + # Stop keep-alive self.opcua_connection.stop() + # Turn off again, in case of race conditions through reconnecting + self.set_state(DevState.OFF) + # PROTECTED REGION END # // RCUSCC.Off @command( -- GitLab