diff --git a/README.md b/README.md index 7bc350f5a1d684e7b163cd4b59b76bf587b4b28e..a6ff51ff9532d71f1e1b675e481d7a05caedf724 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,7 @@ Next change the version in the following places: # Release Notes +* 0.42.6 Fix crash caused by emitting change events for attributes polled by Tango * 0.42.5 Add additional features to protection control * 0.42.4 Add integration test fixture that routinely tests against cross test dependencies * 0.42.3 Use PyTango 10.0.0rc3 to reduce memory leaks diff --git a/tangostationcontrol/VERSION b/tangostationcontrol/VERSION index 56754720a518bb276eef78834b2bb8f7bf9cd28c..e4e9a42eb61a0200e9e386700de77c7f438fcf29 100644 --- a/tangostationcontrol/VERSION +++ b/tangostationcontrol/VERSION @@ -1 +1 @@ -0.42.5 +0.42.6 diff --git a/tangostationcontrol/tangostationcontrol/common/events/change_events.py b/tangostationcontrol/tangostationcontrol/common/events/change_events.py index 60ce4bc0d8eb203b25a3cff6dc8535d4d6e2d82c..00eb0a8a6087f4753912bf2b71e41ece5ca861d1 100644 --- a/tangostationcontrol/tangostationcontrol/common/events/change_events.py +++ b/tangostationcontrol/tangostationcontrol/common/events/change_events.py @@ -62,5 +62,18 @@ class ChangeEvents: # self.device.push_change_event(attr_name, value, time.now(), AttrQuality.ATTR_INVALID, 0, 0) return + if ( + self.device.is_attribute_polled(attr_name) + and self.device.get_attribute_poll_period(attr_name) > 0 + ): + # Emitting an event now can cause a segfault in devices using the Asyncio green mode, + # if the Tango polling thread is accessing the same attribute we push an event for. + # + # See https://gitlab.com/tango-controls/cppTango/-/merge_requests/1316 + # and https://gitlab.com/tango-controls/pytango/-/merge_requests/729 + raise RuntimeError( + f"Cannot send change event for attribute {attr_name} of device {self.device} as it is already polled by Tango. Calling push_change_event can cause a Segmentation Fault." + ) + # emit any change self.device.push_change_event(attr_name, value) diff --git a/tangostationcontrol/tangostationcontrol/common/events/subscriptions.py b/tangostationcontrol/tangostationcontrol/common/events/subscriptions.py index f6fe413ab271e8365edf34180993699de64c4147..d43049ff77867317151e2e6b57f105bafd99751c 100644 --- a/tangostationcontrol/tangostationcontrol/common/events/subscriptions.py +++ b/tangostationcontrol/tangostationcontrol/common/events/subscriptions.py @@ -11,7 +11,6 @@ from prometheus_client import Counter from tangostationcontrol.common.lofar_logging import exception_to_str from tangostationcontrol.common.constants import ( - DEFAULT_POLLING_PERIOD_MS, DEFAULT_METRICS_POLLING_PERIOD_MS, ) from lofar_station_client.common import CaseInsensitiveString @@ -129,7 +128,8 @@ class EventSubscriptions: proxy: DeviceProxy, attr_name: str, callback: EventCallbackType, - period: int = DEFAULT_POLLING_PERIOD_MS, + # a friendly default rate that we tend to poll at anyway + period: int = DEFAULT_METRICS_POLLING_PERIOD_MS, ): """Subscribe to changes to an attribute of another device. Immediately and on change, the provided callback will be called as diff --git a/tangostationcontrol/tangostationcontrol/devices/base_device_classes/lofar_device.py b/tangostationcontrol/tangostationcontrol/devices/base_device_classes/lofar_device.py index ca93d23c35613cdfb654a26168757631c075750b..4ba72ceef770ecc53279c265975acf5ba3cd735a 100644 --- a/tangostationcontrol/tangostationcontrol/devices/base_device_classes/lofar_device.py +++ b/tangostationcontrol/tangostationcontrol/devices/base_device_classes/lofar_device.py @@ -192,11 +192,23 @@ class AttributePoller: # by Prometheus not to exist, leading to gaps in the metric # even if it functions correctly. - for attr_name, attr_data in self._poll_list.items(): + for attr_name, attr_data in list(self._poll_list.items()): # stop polling if we turned to OFF/FAULT during this loop if not self.polling_allowed(): return + if self.device.is_attribute_polled(attr_name): + # ChangeEvents.send_change_event is not allowed for attributes already pollled + # by Tango. Also, because they're already polled, Tango will emit the change + # events for us. So we simply filter those attributes out. + logger.info( + f"Not polling {attr_name} anymore as it is already polled by Tango." + ) + + # We don't want to report the above every second. Remove this attribute from the list + del self._poll_list[attr_name] + continue + value = await self._read_attribute_nothrow(attr_name) # stop polling if we turned to OFF/FAULT during this loop diff --git a/tangostationcontrol/test/common/events/test_change_events.py b/tangostationcontrol/test/common/events/test_change_events.py index 03d6d9386d6c7d61424622b85771c6b4fd5e3267..25caf8a5d2c9946fa183677c58b4edcdb5d0ca6c 100644 --- a/tangostationcontrol/test/common/events/test_change_events.py +++ b/tangostationcontrol/test/common/events/test_change_events.py @@ -8,6 +8,7 @@ from tangostationcontrol.common.events import ( from tango.server import Device, attribute, AttrWriteType, command from tango.test_context import DeviceTestContext +from tango import DevFailed from test import base @@ -112,3 +113,25 @@ class TestChangeEvents(EventSubscriptionMixin, base.TestCase): # this should not result in a change event self.wait_for_no_callback() + + def test_no_change_event_for_polled_attributes(self): + """Test whether we allow push_change_event for attributes polled by Tango.""" + + class PolledTestDevice(self.TestDevice): + @attribute(dtype=int, polling_period=1000) + def polled_attr(self): + return 42 + + def init_device(self): + super().init_device() + + self.custom_change_events.configure_attribute("polled_attr") + + @command() + def generate_change_event(self): + self.custom_change_events.send_change_event("polled_attr", 42) + + with DeviceTestContext(PolledTestDevice, properties={}, process=True) as proxy: + # try to send a change event + with self.assertRaises(DevFailed): + proxy.generate_change_event() diff --git a/tangostationcontrol/test/devices/base_device_classes/test_async_device.py b/tangostationcontrol/test/devices/base_device_classes/test_async_device.py index 1438e9060a14637ad3a145e4cc2549aeaec0a4f6..8085a70a52e3c85918af9afd8caeb7e3a36a42a2 100644 --- a/tangostationcontrol/test/devices/base_device_classes/test_async_device.py +++ b/tangostationcontrol/test/devices/base_device_classes/test_async_device.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import numpy +import time from tango.server import attribute from tango.server import device_property from tango import DevSource @@ -9,6 +10,7 @@ from tango import DevSource from tango.test_context import DeviceTestContext from tangostationcontrol.devices.base_device_classes import async_device +from tangostationcontrol.devices.base_device_classes import lofar_device from test.devices.base_device_classes.test_lofar_device import TestLofarDevice @@ -57,6 +59,42 @@ class TestAsyncDevice(TestLofarDevice): ) as proxy: self.assertEqual(proxy.example_attribute, self.property_length) + def test_poll_attribute_crash(self): + """Running Tango's own poll thread concurrent with push_event (from our AttributePoller) + causes a segmentation fault. This test tries to force this race condition to happen. + """ + + lofar_device.DEFAULT_POLLING_PERIOD_MS = 50 + + class MyAsyncDevice(self.test_device): + async def init_device(self): + await super().init_device() + + self.poll_attribute("A", 5) + + self.attribute_poller.register("A") + + @attribute(dtype=float) + async def A(self): + time.sleep(0.01) + return 42.0 + + @attribute(dtype=int) + async def A_read_counter(self): + return self._A_read_counter + + with DeviceTestContext(MyAsyncDevice, process=False) as proxy: + # make sure we don't get a cached result if the + # poll_attribute was already called by Tango's polling loop. + proxy.set_source(DevSource.DEV) + + proxy.initialise() + proxy.on() + + # force poll + for _ in range(100): + proxy.poll_attributes() + def test_poll_attribute(self): """Test whether poll_attribute really polls registered attributes.""" diff --git a/tangostationcontrol/test/devices/base_device_classes/test_lofar_device.py b/tangostationcontrol/test/devices/base_device_classes/test_lofar_device.py index 7eceb2630836a0cc1b1b768a937fbcdf14b68fe7..16132eb67217f27273cfa6ecf600ae6803e789d1 100644 --- a/tangostationcontrol/test/devices/base_device_classes/test_lofar_device.py +++ b/tangostationcontrol/test/devices/base_device_classes/test_lofar_device.py @@ -36,6 +36,9 @@ class TestAttributePoller(IsolatedAsyncioTestCase): def is_attribute_access_allowed(self, _): return True + def is_attribute_polled(self, _): + return False + async def test_is_registered(self): """Does is_registered() reflect which attributes are registered for polling?"""