Skip to content
Snippets Groups Projects
Commit 76c52ec5 authored by Jan David Mol's avatar Jan David Mol
Browse files

Merge branch 'rollout-v0.28.1' into 'master'

Rollout fixes for v0.28.1

See merge request !852
parents aab8ea26 3588b95b
No related branches found
No related tags found
1 merge request!852Rollout fixes for v0.28.1
Showing
with 175 additions and 81 deletions
......@@ -369,6 +369,9 @@ deploy_nomad:
image:
name: hashicorp/levant
entrypoint: [ "" ]
needs:
- docker_build_image
- docker_build_image_device_base
when: manual
rules:
- if: ($CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH) || $CI_COMMIT_TAG
......
......@@ -153,8 +153,9 @@ class TestPowerHierarchyDevice(base.IntegrationTestCase):
self.assertEqual(self.ccd_proxy.state(), DevState.OFF)
# Switch from OFF to HIBERNATE
self.stationmanager_proxy.station_hibernate()
self.assertEqual(self.stationmanager_proxy.station_state_R.name, "HIBERNATE")
self.assertEqual(
self.stationmanager_proxy.last_requested_transition_R, "OFF -> HIBERNATE"
self.stationmanager_proxy.requested_station_state_R.name, "HIBERNATE"
)
self.assertEqual(self.psoc_proxy.state(), DevState.ON)
self.assertEqual(self.pcon_proxy.state(), DevState.ON)
......@@ -173,8 +174,9 @@ class TestPowerHierarchyDevice(base.IntegrationTestCase):
"""
# Switch from OFF to HIBERNATE
self.stationmanager_proxy.station_hibernate()
self.assertEqual(self.stationmanager_proxy.station_state_R.name, "HIBERNATE")
self.assertEqual(
self.stationmanager_proxy.last_requested_transition_R, "OFF -> HIBERNATE"
self.stationmanager_proxy.requested_station_state_R.name, "HIBERNATE"
)
self.assertEqual(self.apspu_h0_proxy.state(), DevState.OFF)
self.assertEqual(self.apspu_l0_proxy.state(), DevState.OFF)
......@@ -186,9 +188,9 @@ class TestPowerHierarchyDevice(base.IntegrationTestCase):
self.assertEqual(self.sdpfirmware_proxy.state(), DevState.OFF)
# Switch from HIBERNATE to STANDBY
self.stationmanager_proxy.station_standby()
self.assertEqual(self.stationmanager_proxy.station_state_R.name, "STANDBY")
self.assertEqual(
self.stationmanager_proxy.last_requested_transition_R,
"HIBERNATE -> STANDBY",
self.stationmanager_proxy.requested_station_state_R.name, "STANDBY"
)
self.assertEqual(self.apspu_h0_proxy.state(), DevState.ON)
self.assertEqual(self.apspu_l0_proxy.state(), DevState.ON)
......@@ -224,9 +226,8 @@ class TestPowerHierarchyDevice(base.IntegrationTestCase):
self.assertEqual(self.sdp_proxy.state(), DevState.OFF)
self.assertEqual(self.antennafield_proxy.state(), DevState.OFF)
self.stationmanager_proxy.station_on()
self.assertEqual(
self.stationmanager_proxy.last_requested_transition_R, "STANDBY -> ON"
)
self.assertEqual(self.stationmanager_proxy.station_state_R.name, "ON")
self.assertEqual(self.stationmanager_proxy.requested_station_state_R.name, "ON")
self.assertEqual(self.sdp_proxy.state(), DevState.ON)
self.assertEqual(self.antennafield_proxy.state(), DevState.ON)
......@@ -259,8 +260,9 @@ class TestPowerHierarchyDevice(base.IntegrationTestCase):
self.stationmanager_proxy.station_on()
# Reverse to STANDBY
self.stationmanager_proxy.station_standby()
self.assertEqual(self.stationmanager_proxy.station_state_R.name, "STANDBY")
self.assertEqual(
self.stationmanager_proxy.last_requested_transition_R, "ON -> STANDBY"
self.stationmanager_proxy.requested_station_state_R.name, "STANDBY"
)
self.assertEqual(self.sdp_proxy.state(), DevState.OFF)
self.assertEqual(self.antennafield_proxy.state(), DevState.OFF)
......@@ -282,9 +284,9 @@ class TestPowerHierarchyDevice(base.IntegrationTestCase):
self.stationmanager_proxy.station_standby()
# Reverse to HIBERNATE
self.stationmanager_proxy.station_hibernate()
self.assertEqual(self.stationmanager_proxy.station_state_R.name, "HIBERNATE")
self.assertEqual(
self.stationmanager_proxy.last_requested_transition_R,
"STANDBY -> HIBERNATE",
self.stationmanager_proxy.requested_station_state_R.name, "HIBERNATE"
)
self.assertEqual(self.apspu_h0_proxy.state(), DevState.OFF)
self.assertEqual(self.apspu_l0_proxy.state(), DevState.OFF)
......
# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy)
# SPDX-License-Identifier: Apache-2.0
from enum import Enum
from enum import IntEnum
from typing import Dict, Optional
from functools import wraps
......@@ -36,13 +36,13 @@ POWER_OFF_COMMAND_STATES = OPERATIONAL_STATES + [DevState.STANDBY, DevState.DISA
# -----------------------
class StationState(Enum):
class StationState(IntEnum):
"""Station states enumeration"""
OFF = "OFF"
HIBERNATE = "HIBERNATE"
STANDBY = "STANDBY"
ON = "ON"
OFF = 0
HIBERNATE = 1
STANDBY = 2
ON = 3
# Contains which transitions are allowed for a given states
......@@ -58,6 +58,7 @@ DEVICES_ON_IN_STATION_STATE: Dict[str, Optional[StationState]] = {
"""In which StationState each device class should be switched ON."""
"StationManager": StationState.HIBERNATE,
"CCD": StationState.HIBERNATE,
"EC": StationState.HIBERNATE,
"PCON": StationState.HIBERNATE,
"PSOC": StationState.HIBERNATE,
"TemperatureManager": StationState.HIBERNATE,
......
......@@ -219,7 +219,7 @@ class PowerHierarchyDevice(AbstractHierarchyDevice):
@suppress_exceptions(self.continue_on_failure)
def power_antennas_on(device: DeviceProxy):
# AntennaField: Power on used antennas
if device_class_matches(device, "AntennaField"):
if device_class_matches(device, ("AFL", "AFH")):
logger.info("Powering on %s: Antennas", device)
device.power_hardware_on()
# TODO(JDM): Report which antennas
......@@ -252,7 +252,7 @@ class PowerHierarchyDevice(AbstractHierarchyDevice):
@run_if_device_on_in_station_state(StationState.ON)
def power_off_from_on(device: DeviceProxy):
# AntennaField: Power off all antennas
if device_class_matches(device, "AntennaField"):
if device_class_matches(device, ("AFL", "AFH")):
logger.info("Powering off %s: Antennas", device)
device.power_hardware_off()
# TODO(JDM): Report which antennas
......
......@@ -4,10 +4,9 @@
""" Calibration Device Server for LOFAR2.0
"""
import datetime
import logging
import numpy
from prometheus_client import Counter
from tango import EventType, Database
from tango.server import device_property, command, attribute
from tangostationcontrol.common.calibration import (
......@@ -32,7 +31,7 @@ from tangostationcontrol.devices.antennafield.afl import AFL
from tangostationcontrol.devices.base_device_classes.lofar_device import LOFARDevice
from tangostationcontrol.devices.sdp.firmware import SDPFirmware
from tangostationcontrol.devices.sdp.sdp import SDP
from tangostationcontrol.metrics import device_metrics
from tangostationcontrol.metrics import device_metrics, AttributeMetric, device_labels
logger = logging.getLogger()
__all__ = ["Calibration"]
......@@ -51,13 +50,18 @@ class Calibration(LOFARDevice):
self.hba_proxies: CaseInsensitiveDict = CaseInsensitiveDict()
self.lba_proxies: CaseInsensitiveDict = CaseInsensitiveDict()
self.ant_proxies: CaseInsensitiveDict = CaseInsensitiveDict()
self.last_ant_calibration_timestamp: CaseInsensitiveDict[
str, datetime.datetime | None
] = CaseInsensitiveDict()
# Super must be called after variable assignment due to executing init_device!
super().__init__(cl, name)
self.calibration_count_metric = AttributeMetric(
"calibration_count",
"Number of times calibration has been triggered for each AntennaField device",
device_labels(self),
Counter,
dynamic_labels=["antennafield"],
)
def _calibrate_antenna_field(self, device):
"""Recalibrate a specific AntennaField."""
......@@ -69,11 +73,14 @@ class Calibration(LOFARDevice):
logger.info("Re-calibrate antenna field %s", device)
self.last_ant_calibration_timestamp[device] = datetime.datetime.now()
self.calibrate_recv(device)
self.calibrate_sdp(device)
# get device member in its original casing
antenna_field_name = device.get_name().split("/")[2]
self.calibration_count_metric.get_metric([antenna_field_name]).inc()
@log_exceptions()
def _antennafield_changed_event(self, event):
"""Trigger on key external changes in AntennaField settings."""
......@@ -119,10 +126,14 @@ class Calibration(LOFARDevice):
for k, ant in self.ant_proxies.items():
# Recalibrate associated AntennaFields
sdpfirmware_device = ant.SDPFirmware_device_R
sdp_device = self.sdpfirmware_proxies[sdpfirmware_device].SDP_device_R
if device_name_matches(sdp_device, event.device.name()):
if device_name_matches(sdpfirmware_device, event.device.name()):
self._calibrate_antenna_field(k)
break
else:
logger.warning(
f"Could not find any AntennaField to calibrate for clock change event from {event.device}"
)
# TODO(JDM): While we could read this from our control parent (StationManager),
# doing so creates a deadlock when StationManager wants to initialise this
......@@ -147,16 +158,6 @@ class Calibration(LOFARDevice):
def AntennaFields_Monitored_R(self):
return list(self.ant_proxies.keys())
@attribute(dtype=(numpy.int64,), max_dim_x=20)
def Last_AntennaField_Calibration_Timestamp_R(self):
return numpy.array(
[
ts.timestamp() if ts else 0
for ts in self.last_ant_calibration_timestamp.values()
],
dtype=numpy.int64,
)
@attribute(dtype=(str,), max_dim_x=20)
def SDPs_Monitored_R(self):
return list(self.sdp_proxies.keys())
......@@ -256,7 +257,6 @@ class Calibration(LOFARDevice):
}
for d in devices:
logger.debug("found HBA antenna field device %s", str(d))
self.last_ant_calibration_timestamp[d] = None
devices = db.get_device_exported_for_class(AFL.__name__)
self.lba_proxies = {
......
......@@ -15,6 +15,7 @@ from jsonschema.exceptions import ValidationError
from tango import AttrWriteType, DeviceProxy, DevState, Util
from tango.server import attribute
from tangostationcontrol.common.constants import (
DEFAULT_METRICS_POLLING_PERIOD,
DEFAULT_POLLING_PERIOD,
MAX_ANTENNA,
N_beamlets_ctrl,
......@@ -220,9 +221,9 @@ class ObservationField(LOFARDevice):
def HBA_tile_beam_R(self):
try:
if self._observation_field_settings.HBA.tile_beam is None:
return None
return []
except AttributeError:
return None
return []
pointing_direction = self._observation_field_settings.HBA.tile_beam
return [
......@@ -308,6 +309,10 @@ class ObservationField(LOFARDevice):
self._observation_field_settings.antenna_field,
)
# TODO(JDM): Somehow this does not get configured automatically
# as it does for non-dynamic devices.
self.poll_command("poll_attributes", DEFAULT_METRICS_POLLING_PERIOD)
def configure_for_off(self):
"""Indicate the observation has stopped"""
......
......@@ -54,11 +54,11 @@ class SDP(OPCUADevice):
# Device Properties
# -----------------
# By default, do not enable processing until:
# By default, do not enable processing when:
# * the ring is configured by this device
# * the (number of) beamlet output destinations is configured by the beamlet device
FPGA_processing_enable_RW_default = device_property(
dtype="DevVarBooleanArray", mandatory=False, default_value=[False] * N_pn
dtype="DevVarBooleanArray", mandatory=False, default_value=[True] * N_pn
)
FPGA_ring_node_offset_RW_default = device_property(
......
......@@ -72,13 +72,17 @@ class StationManager(AsyncDevice):
def station_name_R(self):
return self.Station_Name
@attribute(dtype=str, fisallowed="is_attribute_access_allowed")
@attribute(dtype=StationState, fisallowed="is_attribute_access_allowed")
def station_state_R(self):
return self.station_state.name
return self.station_state
@attribute(dtype=str, fisallowed="is_attribute_access_allowed")
def last_requested_transition_R(self):
return self.last_requested_transition or ""
@attribute(dtype=StationState, fisallowed="is_attribute_access_allowed")
def requested_station_state_R(self):
return self.requested_station_state
@attribute(dtype=bool, fisallowed="is_attribute_access_allowed")
def station_state_transitioning_R(self):
return self.transition_lock and self.transition_lock.locked()
@attribute(dtype=(str,), max_dim_x=1024, fisallowed="is_attribute_access_allowed")
def last_requested_transition_exceptions_R(self):
......@@ -98,7 +102,7 @@ class StationManager(AsyncDevice):
def __init__(self, cl, name):
self.station_state = StationState.OFF
self.stationmanager_ph = None
self.last_requested_transition = None
self.requested_station_state = StationState.OFF
self.last_requested_transition_exceptions = []
self.transition_lock = asyncio.Lock()
......@@ -171,7 +175,7 @@ class StationManager(AsyncDevice):
async def _transition(
self,
transition_desc: str,
target_state: StationState,
transition_func: Callable[[], Awaitable[None]],
):
"""Transition to a station state using `transition_func`.
......@@ -187,13 +191,14 @@ class StationManager(AsyncDevice):
)
logger.info(
"Station %s requested to perform the %s Power Sequence",
"Station %s requested to perform the %s -> %s Power Sequence",
self.Station_Name,
transition_desc,
self.station_state.name,
target_state.name,
)
try:
self.last_requested_transition = transition_desc
self.requested_station_state = target_state
self.last_requested_transition_exceptions = await transition_func()
except Exception as ex:
# unsuppressed exception
......@@ -201,9 +206,10 @@ class StationManager(AsyncDevice):
raise
logger.info(
"Station %s has correctly completed the %s Power Sequence",
"Station %s has correctly completed the %s -> %s Power Sequence",
self.Station_Name,
transition_desc,
self.station_state.name,
target_state.name,
)
# --------
......@@ -226,6 +232,7 @@ class StationManager(AsyncDevice):
# not implemented -> call the correct state transition function
# update the station_state variable when successful
self.requested_station_state = StationState.OFF
self.station_state = StationState.OFF
finally:
self.transition_lock.release()
......@@ -247,11 +254,11 @@ class StationManager(AsyncDevice):
try:
if self.station_state == StationState.OFF:
await self._transition(
"OFF -> HIBERNATE", self.stationmanager_ph.off_to_hibernate
StationState.HIBERNATE, self.stationmanager_ph.off_to_hibernate
)
elif self.station_state == StationState.STANDBY:
await self._transition(
"STANDBY -> HIBERNATE",
StationState.HIBERNATE,
self.stationmanager_ph.standby_to_hibernate,
)
except DevFailed as exc:
......@@ -283,12 +290,12 @@ class StationManager(AsyncDevice):
try:
if self.station_state == StationState.HIBERNATE:
await self._transition(
"HIBERNATE -> STANDBY",
StationState.STANDBY,
self.stationmanager_ph.hibernate_to_standby,
)
elif self.station_state == StationState.ON:
await self._transition(
"ON -> STANDBY", self.stationmanager_ph.on_to_standby
StationState.STANDBY, self.stationmanager_ph.on_to_standby
)
except DevFailed as exc:
error_string = f"Station {self.Station_Name} \
......@@ -318,7 +325,7 @@ class StationManager(AsyncDevice):
# call the correct state transition function
try:
await self._transition(
"STANDBY -> ON", self.stationmanager_ph.standby_to_on
StationState.ON, self.stationmanager_ph.standby_to_on
)
except DevFailed as exc:
error_string = f"Station {self.Station_Name} \
......
......@@ -6,7 +6,7 @@ from typing import List
from tango import Attribute, DevState
from tango.server import Device, attribute
from prometheus_client import Enum, Metric
from prometheus_client import Enum
from prometheus_client.core import Info, Counter
from tangostationcontrol import __version__ as version
......@@ -38,22 +38,17 @@ class VersionMetric(AttributeMetric):
class StateMetric(AttributeMetric):
def __init__(self, device: Device):
super().__init__("state", "State of the device.", device.metric_labels, Enum)
self.set_state(device.get_state())
wrap_method(device, device.set_state, self.set_state, post_execute=False)
def make_metric(self) -> Metric:
return Enum(
self.name,
self.description,
labelnames=self.label_keys(),
states=list(DevState.names),
super().__init__(
"state",
"State of the device.",
device.metric_labels,
Enum,
metric_class_init_kwargs={"states": list(DevState.names)},
)
def set_state(self, state):
self.get_metric().state(state.name)
self.set_value(device.get_state())
wrap_method(device, device.set_state, self.set_value, post_execute=False)
class AccessCountMetric(AttributeMetric):
......
......@@ -3,8 +3,10 @@ from tango import AttrWriteType
from tango import CmdArgType
from tango import Attribute
from tango import DevFailed
from prometheus_client import Metric, Gauge, Info
from tango import DevState
from prometheus_client import Metric, Gauge, Info, Enum
from asyncio import iscoroutinefunction
from enum import IntEnum
from typing import List, Dict, Callable, Union
import functools
import logging
......@@ -112,6 +114,8 @@ class AttributeMetric:
description: str,
static_labels: Dict[str, str],
metric_class=Gauge,
metric_class_init_kwargs: Dict[str, object] | None = None,
dynamic_labels: List[str] | None = None,
):
self.name = metric_name(name)
self.description = description
......@@ -120,6 +124,10 @@ class AttributeMetric:
self.static_label_keys = list(static_labels.keys())
self.static_label_values = list(static_labels.values())
self.dynamic_label_keys = dynamic_labels or []
self.metric_class_init_kwargs = metric_class_init_kwargs or {}
if self.name not in METRICS:
METRICS[self.name] = self.make_metric()
......@@ -131,22 +139,30 @@ class AttributeMetric:
def label_keys(self) -> List[str]:
"""Return the list of labels that we will use."""
return self.static_label_keys
return self.static_label_keys + self.dynamic_label_keys
def make_metric(self) -> Metric:
"""Construct a metric that collects samples for this attribute."""
return self.metric_class(
self.name, self.description, labelnames=self.label_keys()
self.name,
self.description,
labelnames=self.label_keys(),
**self.metric_class_init_kwargs,
)
def get_metric(self, extra_labels: List = None) -> Metric:
def get_metric(self, dynamic_label_values: List = None) -> Metric:
"""Return the metric that uses the default labels."""
return self.metric.labels(*self.static_label_values, *(extra_labels or []))
return self.metric.labels(
*self.static_label_values, *(dynamic_label_values or [])
)
def set_value(self, value: object):
"""A new value for the attribute is known. Feed it to the metric."""
# set it, this class will take care of the default labels
if self.metric_class == Enum:
self._enum_value(value, self.static_label_values)
else:
self._set_value(value, self.static_label_values)
def _set_value(self, value: object, labels: List[str]):
......@@ -157,6 +173,12 @@ class AttributeMetric:
assert self.metric_class == Info
self.metric.labels(*labels).info(value)
def _enum_value(self, value: str | IntEnum, labels: List[str]):
assert self.metric_class == Enum
self.metric.labels(*labels).state(
value.name if isinstance(value, (DevState, IntEnum)) else value
)
def collect(self) -> List[Metric]:
"""Return all collected samples."""
return self.metric.collect()
......@@ -183,6 +205,19 @@ class ScalarAttributeMetric(AttributeMetric):
if self.data_type == CmdArgType.DevString:
super().__init__(attribute.get_name(), description, static_labels, Info)
elif self.data_type == CmdArgType.DevEnum:
# evil PyTango foo to obtain enum labels from class attribute
enum_labels = getattr(
device.__class__, attribute.get_name()
).att_prop.enum_labels.split(",")
super().__init__(
attribute.get_name(),
description,
static_labels,
Enum,
metric_class_init_kwargs={"states": enum_labels},
)
else:
super().__init__(attribute.get_name(), description, static_labels)
......
......@@ -10,6 +10,7 @@ from tango.server import (
from tango.test_context import DeviceTestContext
from prometheus_client import generate_latest
from prometheus_client.registry import REGISTRY
from enum import IntEnum
from typing import Dict
import asyncio
import numpy
......@@ -107,6 +108,10 @@ class TestMetrics(base.TestCase):
def test_scalar_attribute_metric(self):
"""Test ScalarAttributeMetric"""
class MyEnum(IntEnum):
ZERO = 0
ONE = 1
class test_device(Device):
float_attr = attribute(
doc="docstr",
......@@ -120,10 +125,17 @@ class TestMetrics(base.TestCase):
fget=lambda obj: "foo",
)
enum_attr = attribute(
doc="docstr",
dtype=MyEnum,
fget=lambda obj: MyEnum.ONE,
)
def init_device(self):
# create an attribute metric and assign a value
self.float_metric = ScalarAttributeMetric(self, self.float_attr)
self.str_metric = ScalarAttributeMetric(self, self.str_attr)
self.enum_metric = ScalarAttributeMetric(self, self.enum_attr)
@command()
def test(device):
......@@ -166,10 +178,44 @@ class TestMetrics(base.TestCase):
metric.samples[0].labels,
)
# check collected metrics (enum_attr)
metric = device.enum_metric.metric.collect()[0]
self.assertEqual("ds_enum_attr", metric.name)
self.assertEqual("docstr", metric.documentation)
# check labels as the DeviceTestContext would result in
self.assertDictEqual(
{
"domain": "test",
"family": "nodb",
"member": "test_device",
"device_class": "test_device",
"access": "r",
"ds_enum_attr": "ZERO",
},
metric.samples[0].labels,
)
self.assertEqual(0, metric.samples[0].value)
self.assertDictEqual(
{
"domain": "test",
"family": "nodb",
"member": "test_device",
"device_class": "test_device",
"access": "r",
"ds_enum_attr": "ONE",
},
metric.samples[1].labels,
)
self.assertEqual(1, metric.samples[1].value)
with DeviceTestContext(test_device, process=False) as proxy:
# access the attribute to trigger value propagation to metric
_ = proxy.float_attr
_ = proxy.str_attr
_ = proxy.enum_attr
proxy.test()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment