Skip to content
Snippets Groups Projects
Commit d768391b authored by Jan David Mol's avatar Jan David Mol
Browse files

L2SS-1987: Add test to check for race condition between push_change_event and Tango's polling loop

parent 2f4e64ab
No related branches found
No related tags found
1 merge request!981L2SS-1987: Add test to check for race condition between push_change_event and Tango's polling loop
...@@ -151,6 +151,7 @@ Next change the version in the following places: ...@@ -151,6 +151,7 @@ Next change the version in the following places:
# Release Notes # Release Notes
* 0.42.6 Fix crash caused by emitting change events for attributes polled by Tango
* 0.42.5 Add additional features to protection control * 0.42.5 Add additional features to protection control
* 0.42.4 Add integration test fixture that routinely tests against cross test dependencies * 0.42.4 Add integration test fixture that routinely tests against cross test dependencies
* 0.42.3 Use PyTango 10.0.0rc3 to reduce memory leaks * 0.42.3 Use PyTango 10.0.0rc3 to reduce memory leaks
......
0.42.5 0.42.6
...@@ -62,5 +62,18 @@ class ChangeEvents: ...@@ -62,5 +62,18 @@ class ChangeEvents:
# self.device.push_change_event(attr_name, value, time.now(), AttrQuality.ATTR_INVALID, 0, 0) # self.device.push_change_event(attr_name, value, time.now(), AttrQuality.ATTR_INVALID, 0, 0)
return return
if (
self.device.is_attribute_polled(attr_name)
and self.device.get_attribute_poll_period(attr_name) > 0
):
# Emitting an event now can cause a segfault in devices using the Asyncio green mode,
# if the Tango polling thread is accessing the same attribute we push an event for.
#
# See https://gitlab.com/tango-controls/cppTango/-/merge_requests/1316
# and https://gitlab.com/tango-controls/pytango/-/merge_requests/729
raise RuntimeError(
f"Cannot send change event for attribute {attr_name} of device {self.device} as it is already polled by Tango. Calling push_change_event can cause a Segmentation Fault."
)
# emit any change # emit any change
self.device.push_change_event(attr_name, value) self.device.push_change_event(attr_name, value)
...@@ -11,7 +11,6 @@ from prometheus_client import Counter ...@@ -11,7 +11,6 @@ from prometheus_client import Counter
from tangostationcontrol.common.lofar_logging import exception_to_str from tangostationcontrol.common.lofar_logging import exception_to_str
from tangostationcontrol.common.constants import ( from tangostationcontrol.common.constants import (
DEFAULT_POLLING_PERIOD_MS,
DEFAULT_METRICS_POLLING_PERIOD_MS, DEFAULT_METRICS_POLLING_PERIOD_MS,
) )
from lofar_station_client.common import CaseInsensitiveString from lofar_station_client.common import CaseInsensitiveString
...@@ -129,7 +128,8 @@ class EventSubscriptions: ...@@ -129,7 +128,8 @@ class EventSubscriptions:
proxy: DeviceProxy, proxy: DeviceProxy,
attr_name: str, attr_name: str,
callback: EventCallbackType, callback: EventCallbackType,
period: int = DEFAULT_POLLING_PERIOD_MS, # a friendly default rate that we tend to poll at anyway
period: int = DEFAULT_METRICS_POLLING_PERIOD_MS,
): ):
"""Subscribe to changes to an attribute of another device. """Subscribe to changes to an attribute of another device.
Immediately and on change, the provided callback will be called as Immediately and on change, the provided callback will be called as
......
...@@ -192,11 +192,23 @@ class AttributePoller: ...@@ -192,11 +192,23 @@ class AttributePoller:
# by Prometheus not to exist, leading to gaps in the metric # by Prometheus not to exist, leading to gaps in the metric
# even if it functions correctly. # even if it functions correctly.
for attr_name, attr_data in self._poll_list.items(): for attr_name, attr_data in list(self._poll_list.items()):
# stop polling if we turned to OFF/FAULT during this loop # stop polling if we turned to OFF/FAULT during this loop
if not self.polling_allowed(): if not self.polling_allowed():
return return
if self.device.is_attribute_polled(attr_name):
# ChangeEvents.send_change_event is not allowed for attributes already pollled
# by Tango. Also, because they're already polled, Tango will emit the change
# events for us. So we simply filter those attributes out.
logger.info(
f"Not polling {attr_name} anymore as it is already polled by Tango."
)
# We don't want to report the above every second. Remove this attribute from the list
del self._poll_list[attr_name]
continue
value = await self._read_attribute_nothrow(attr_name) value = await self._read_attribute_nothrow(attr_name)
# stop polling if we turned to OFF/FAULT during this loop # stop polling if we turned to OFF/FAULT during this loop
......
...@@ -8,6 +8,7 @@ from tangostationcontrol.common.events import ( ...@@ -8,6 +8,7 @@ from tangostationcontrol.common.events import (
from tango.server import Device, attribute, AttrWriteType, command from tango.server import Device, attribute, AttrWriteType, command
from tango.test_context import DeviceTestContext from tango.test_context import DeviceTestContext
from tango import DevFailed
from test import base from test import base
...@@ -112,3 +113,25 @@ class TestChangeEvents(EventSubscriptionMixin, base.TestCase): ...@@ -112,3 +113,25 @@ class TestChangeEvents(EventSubscriptionMixin, base.TestCase):
# this should not result in a change event # this should not result in a change event
self.wait_for_no_callback() self.wait_for_no_callback()
def test_no_change_event_for_polled_attributes(self):
"""Test whether we allow push_change_event for attributes polled by Tango."""
class PolledTestDevice(self.TestDevice):
@attribute(dtype=int, polling_period=1000)
def polled_attr(self):
return 42
def init_device(self):
super().init_device()
self.custom_change_events.configure_attribute("polled_attr")
@command()
def generate_change_event(self):
self.custom_change_events.send_change_event("polled_attr", 42)
with DeviceTestContext(PolledTestDevice, properties={}, process=True) as proxy:
# try to send a change event
with self.assertRaises(DevFailed):
proxy.generate_change_event()
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import numpy import numpy
import time
from tango.server import attribute from tango.server import attribute
from tango.server import device_property from tango.server import device_property
from tango import DevSource from tango import DevSource
...@@ -9,6 +10,7 @@ from tango import DevSource ...@@ -9,6 +10,7 @@ from tango import DevSource
from tango.test_context import DeviceTestContext from tango.test_context import DeviceTestContext
from tangostationcontrol.devices.base_device_classes import async_device from tangostationcontrol.devices.base_device_classes import async_device
from tangostationcontrol.devices.base_device_classes import lofar_device
from test.devices.base_device_classes.test_lofar_device import TestLofarDevice from test.devices.base_device_classes.test_lofar_device import TestLofarDevice
...@@ -57,6 +59,42 @@ class TestAsyncDevice(TestLofarDevice): ...@@ -57,6 +59,42 @@ class TestAsyncDevice(TestLofarDevice):
) as proxy: ) as proxy:
self.assertEqual(proxy.example_attribute, self.property_length) self.assertEqual(proxy.example_attribute, self.property_length)
def test_poll_attribute_crash(self):
"""Running Tango's own poll thread concurrent with push_event (from our AttributePoller)
causes a segmentation fault. This test tries to force this race condition to happen.
"""
lofar_device.DEFAULT_POLLING_PERIOD_MS = 50
class MyAsyncDevice(self.test_device):
async def init_device(self):
await super().init_device()
self.poll_attribute("A", 5)
self.attribute_poller.register("A")
@attribute(dtype=float)
async def A(self):
time.sleep(0.01)
return 42.0
@attribute(dtype=int)
async def A_read_counter(self):
return self._A_read_counter
with DeviceTestContext(MyAsyncDevice, process=False) as proxy:
# make sure we don't get a cached result if the
# poll_attribute was already called by Tango's polling loop.
proxy.set_source(DevSource.DEV)
proxy.initialise()
proxy.on()
# force poll
for _ in range(100):
proxy.poll_attributes()
def test_poll_attribute(self): def test_poll_attribute(self):
"""Test whether poll_attribute really polls registered attributes.""" """Test whether poll_attribute really polls registered attributes."""
......
...@@ -36,6 +36,9 @@ class TestAttributePoller(IsolatedAsyncioTestCase): ...@@ -36,6 +36,9 @@ class TestAttributePoller(IsolatedAsyncioTestCase):
def is_attribute_access_allowed(self, _): def is_attribute_access_allowed(self, _):
return True return True
def is_attribute_polled(self, _):
return False
async def test_is_registered(self): async def test_is_registered(self):
"""Does is_registered() reflect which attributes are registered for polling?""" """Does is_registered() reflect which attributes are registered for polling?"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment