diff --git a/.gitignore b/.gitignore index 0c7683c542829bb28924439439d52f72dfa29c17..7d770d82fca276ee7c371b822e2bf639eb92a4c1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Coverage generations cover .coverage +.coverage.* coverage.xml # IDE files diff --git a/VERSION b/VERSION index c43e1055fd3f2e00540c298404934e8097459098..34a83616bb5aa9a70c5713bc45cd45498a50ba24 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.12 +0.12.1 diff --git a/lofar_station_client/file_access/README.md b/lofar_station_client/file_access/README.md index 07f7e42201b3deed3ac1c7274ca865403c864a25..ca3d33368b0c730dfea3ffe5ace5911afbb3cd84 100644 --- a/lofar_station_client/file_access/README.md +++ b/lofar_station_client/file_access/README.md @@ -2,13 +2,16 @@ ## Define a model -The data structure of the HDF file is defined by python objects using decorators. Currently, there are two decorators available: +The data structure of the HDF file is defined by python objects using decorators. Currently, there are two decorators +available: + 1. `member`: defines a class property to be an HDF group or dataset depending on the type. 2. `attribute`: defines a class property to be an HDF attribute on a group or dataset. ### Dataset definition A basic data structure to define the HDF file looks like this: + ```python class Data: list_of_ints: List[int] = member() @@ -16,18 +19,23 @@ class Data: numpy_array: ndarray = member() ``` -It is important to always use type hints. It not only makes the classes more self-explanatory during development it is also +It is important to always use type hints. It not only makes the classes more self-explanatory during development it is +also important for the file reader to guesstimate the right action to perform. In this first example we only used arrays and lists. These types always map to a dataset within HDF. By default, -the reader is looking for a dataset with the name of the variable, if the dataset is named differently it can be overwritten -by specifying the `name` parameter: `member(name='other_name_then_variable')`. Also, all members are required by default. -If they don't appear in the HDF file an error is thrown. This behavior can be changed by specifying the `optional` parameter: -`member(optional=False)`. +the reader is looking for a dataset with the name of the variable, if the dataset is named differently it can be +overwritten +by specifying the `name` parameter: `member(name='other_name_then_variable')`. Also, all members are required by +default. +If they don't appear in the HDF file an error is thrown. This behavior can be changed by specifying the `optional` +parameter: +`member(optional=True)`. ### Group definition HDF supports to arrange the data in groups. Groups can be defined as additional classes: + ```python class SubGroup: list_of_ints: List[int] = member() @@ -40,28 +48,33 @@ class Data: Additionally, all additional settings apply in the same way as they do for datasets. ### Dictionaries -A special case is the `dict`. It allows to read a set of groups or datasets using the name of the group or dataset as the key. + +A special case is the `dict`. It allows to read a set of groups or datasets using the name of the group or dataset as +the key. ```python class Data: data_dict: Dict[str, List[int]] = member() ``` - ### Attribute definition Attributes in a HDF file can appear on groups as well as on datasets and can be defined by using `attribute()`: + ```python class Data: - first_attr: str = attribute() + an_attr: str = attribute() ``` -The file reader will look for a attribute with the name first_attr on the group that is represented by the class `Data`. -The name of the attribute can be overwritten by specifying the `name` parameter: `attribute(name='other_name')`. All attributes -are required by default and will cause an exception to be thrown if they are not available. This behavior can be changed by specifying the `optional` parameter: -`attribute(optional=False)`. +The file reader will look for an attribute with the name `an_attr` on the group that is represented by the class `Data`. +The name of the attribute can be overwritten by specifying the `name` parameter: `attribute(name='other_name')`. All +attributes +are required by default and will cause an exception to be thrown if they are not available. This behavior can be changed +by specifying the `optional` parameter: +`attribute(optional=True)`. -In HDF also datasets can contain attributes. Since they are usually mapped to primitive types it would not be possible to access +In HDF also datasets can contain attributes. Since they are usually mapped to primitive types it would not be possible +to access these attributes. Therefor `attribute` allows to specify another member in the class by setting `from_member`. ## Read a HDF file @@ -70,5 +83,33 @@ A file can be read using `read_hdf5`: ```python with read_hdf5('file_name.h5', Data) as data: - a = data.first_attr + a = data.an_attr +``` + +## Create a HDF file + +A file can be created using `create_hdf5` - existing files will be overwritten: + +```python +with create_hdf5('file_name.h5', Data) as data: + data.an_attr = "data" ``` +## Change a HDF file + +A file can be changed using `open_hdf5` - the file must exist: + +```python +with open_hdf5('file_name.h5', Data) as data: + data.an_attr = "new value" +``` + +## Data write behaviour + +### members +All changes to members of the object are immediately written to the underlying HDF file. Therefore, altering the object +should be minimized to have no performance degradation. + +### attributes +Attributes are written if `flush()` is invoked on the `FileWriter` or when the `with` scope is exited. This behaviour is +necessary because attributes depend on the underlying members. Therefore, the attributes can only be written after +the members. diff --git a/lofar_station_client/file_access/__init__.py b/lofar_station_client/file_access/__init__.py index 661011d5f68854681f05fc8194dd1da01c8459e9..d916df1d068fdf460e55a0914f9f1115afba420e 100644 --- a/lofar_station_client/file_access/__init__.py +++ b/lofar_station_client/file_access/__init__.py @@ -6,8 +6,19 @@ Contains classes to interact with (hdf5) files """ -from ._hdf5_attribute_def import attribute -from ._hdf5_member_def import member -from .hdf_file_reader import Hdf5FileReader, read_hdf5 +from ._attribute_def import attribute +from ._member_def import member +from ._readers import FileReader +from .hdf._hdf_readers import read_hdf5 +from .hdf._hdf_writers import open_hdf5, create_hdf5 +from ._writers import FileWriter -__all__ = ["Hdf5FileReader", "attribute", "member", "read_hdf5"] +__all__ = [ + "FileReader", + "FileWriter", + "attribute", + "member", + "read_hdf5", + "open_hdf5", + "create_hdf5", +] diff --git a/lofar_station_client/file_access/_attribute_def.py b/lofar_station_client/file_access/_attribute_def.py new file mode 100644 index 0000000000000000000000000000000000000000..6c986fdff97227366d6984351f1eec68857731a3 --- /dev/null +++ b/lofar_station_client/file_access/_attribute_def.py @@ -0,0 +1,66 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Contains HDF5 specific classes and methods to define class members as an HDF attribute +""" +from typing import Any + +from ._readers import DataReader +from ._writers import DataWriter + + +def attribute(name: str = None, optional: bool = False, from_member: str = None): + """ + Define a class member as an attribute within a HDF5 file + """ + return AttributeDef(name, optional, from_member) + + +# pylint: disable=too-few-public-methods +class AttributeDef: + """ + Decorator to extract attributes of HDF5 groups and datasets to pythonic objects + """ + + def __init__(self, name: str, optional: bool, from_member: str = None): + self.name = name + self.from_member = from_member + self.optional = optional + self.owner: Any + + def __set_name__(self, owner, name): + if self.name is None: + self.name = name + self.owner = owner + + def __set__(self, instance, value): + setattr(instance, self.attr_name, value) + + if hasattr(instance, "_data_writer"): + writer: DataWriter = getattr(instance, "_data_writer") + writer.write_attribute( + instance, self.name, self.owner, self.from_member, self.optional, value + ) + + def __get__(self, instance, obj_type=None): + if hasattr(instance, self.attr_name): + return getattr(instance, self.attr_name) + + if hasattr(instance, "_data_reader"): + reader: DataReader = getattr(instance, "_data_reader") + attr = reader.read_attribute( + self.name, self.owner, self.from_member, self.optional + ) + setattr(instance, self.attr_name, attr) + return attr + return None + + @property + def attr_name(self): + """ + Name used to store the value in the owning object + """ + if self.from_member is None: + return f"_a_{self.name}" + return f"_a_{self.from_member}_{self.name}" diff --git a/lofar_station_client/file_access/_compat_utils.py b/lofar_station_client/file_access/_compat_utils.py index 718d9642f974d5f7da83105a8922c0263bda47ff..00f3c5922dae228821f5c7a00d904d59cb8de74e 100644 --- a/lofar_station_client/file_access/_compat_utils.py +++ b/lofar_station_client/file_access/_compat_utils.py @@ -5,11 +5,9 @@ # pylint: skip-file """ -Contains required methods missing in pyton 3.7. Will be obsolete as soon as python 3.7 -goes eol. +Contains required methods missing in older python versions. """ - try: # Python >=3.8 should have these functions already from typing import get_args @@ -34,3 +32,71 @@ except ImportError: res = (list(res[:-1]), res[-1]) return res return () + + +try: + from inspect import get_annotations +except ImportError: + + import sys + import types + import functools + + def get_annotations(obj, *, globals=None, locals=None, eval_str=False): + """ + Simplified copy from the Python 3.10 inspect module, only supporting + get_annotation on types. + https://github.com/python/cpython/blob/3.10/Lib/inspect.py + """ + # class + obj_dict = getattr(obj, "__dict__", None) + if obj_dict and hasattr(obj_dict, "get"): + ann = obj_dict.get("__annotations__", None) + if isinstance(ann, types.GetSetDescriptorType): + ann = None + else: + ann = None + + obj_globals = None + module_name = getattr(obj, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + obj_globals = getattr(module, "__dict__", None) + obj_locals = dict(vars(obj)) + unwrap = obj + + if ann is None: + return {} + + if not isinstance(ann, dict): + raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") + + if not ann: + return {} + + if not eval_str: + return dict(ann) + + if unwrap is not None: + while True: + if hasattr(unwrap, "__wrapped__"): + unwrap = unwrap.__wrapped__ + continue + if isinstance(unwrap, functools.partial): + unwrap = unwrap.func + continue + break + if hasattr(unwrap, "__globals__"): + obj_globals = unwrap.__globals__ + + if globals is None: + globals = obj_globals + if locals is None: + locals = obj_locals + + return_value = { + key: value if not isinstance(value, str) else eval(value, globals, locals) + for key, value in ann.items() + } + return return_value diff --git a/lofar_station_client/file_access/_hdf5_attribute_def.py b/lofar_station_client/file_access/_hdf5_attribute_def.py deleted file mode 100644 index 17bf8f6633398be0cb2cc1ff77a7a586ced7529c..0000000000000000000000000000000000000000 --- a/lofar_station_client/file_access/_hdf5_attribute_def.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) -# SPDX-License-Identifier: Apache-2.0 - -""" -Contains HDF5 specific classes and methods to define class members as an HDF attribute -""" -from inspect import getattr_static -from typing import Any - -from ._hdf5_member_def import Hdf5MemberDef - - -def attribute(name: str = None, optional: bool = False, from_member: str = None): - """ - Define a class member as an attribute within a HDF5 file - """ - return Hdf5AttributeDef(name, optional, from_member) - - -# pylint: disable=too-few-public-methods -class Hdf5AttributeDef: - """ - Decorator to extract attributes of HDF5 groups and datasets to pythonic objects - """ - - def __init__(self, name: str, optional: bool, from_member: str = None): - self.name = name - self.from_member = from_member - self.optional = optional - self.cache = None - self.owner: Any - - def __set_name__(self, owner, name): - if self.name is None: - self.name = name - self.owner = owner - - def __get__(self, obj, obj_type=None): - if self.cache is not None: - return self.cache - - attrs = self._resolve_attrs(obj) - - if self.name not in attrs: - if self.optional: - return None - raise KeyError(f"Could not find required attribute key {self.name}") - - self.cache = attrs[self.name] - return self.cache - - def _resolve_attrs(self, obj): - data = getattr(obj, "_data") - - if self.from_member is None: - return data.attrs - - member: Hdf5MemberDef = getattr_static(self.owner, self.from_member) - return data[member.name].attrs diff --git a/lofar_station_client/file_access/_hdf5_member_def.py b/lofar_station_client/file_access/_hdf5_member_def.py deleted file mode 100644 index d7f67e6ff4a75b45c3788ab5e169e52f6bb1836c..0000000000000000000000000000000000000000 --- a/lofar_station_client/file_access/_hdf5_member_def.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) -# SPDX-License-Identifier: Apache-2.0 - -""" -Contains HDF5 specific classes and methods to define class members as members -of HDF5 files -""" - -from typing import Type - -from ._hdf5_utils import _detect_reader -from ._utils import _extract_type - - -def member(name: str = None, optional: bool = False): - """ - Define a class member as a member of a HDF5 file - """ - return Hdf5MemberDef(name, optional) - - -# pylint: disable=too-few-public-methods -class Hdf5MemberDef: - """ - Decorator to handle the transformation of HDF5 groups - and datasets to pythonic objects - """ - - def __init__(self, name: str, optional: bool): - self.name = name - self.optional = optional - self.cache = None - self.attr_name: str - self.type: Type - - def __set_name__(self, owner, name): - self.attr_name = name - if self.name is None: - self.name = name - self.type = _extract_type(owner, name) - - def __get__(self, obj, obj_type=None): - if self.cache is not None: - return self.cache - - data = getattr(obj, "_data") - - if self.name not in data: - if self.optional: - return None - raise KeyError(f"Could not find required key {self.name}") - - reader = _detect_reader(self.type) - self.cache = reader(data[self.name]) - return self.cache diff --git a/lofar_station_client/file_access/_hdf5_utils.py b/lofar_station_client/file_access/_hdf5_utils.py deleted file mode 100644 index 8748f2383f7ccd67b77f3938cc4ae42700519e91..0000000000000000000000000000000000000000 --- a/lofar_station_client/file_access/_hdf5_utils.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) -# SPDX-License-Identifier: Apache-2.0 - -""" -Utils to handle transformation of HDF5 specific classes to pythonic objects -""" -from collections.abc import MutableMapping -from typing import Type, TypeVar, Dict - -from numpy import zeros, ndarray - -from ._compat_utils import get_origin -from ._utils import _extract_base_type - -T = TypeVar("T") - - -def _assert_is_dataset(value): - if issubclass(type(value), MutableMapping): - raise TypeError( - f"Only <Dataset> can be mappet do primitive type while " - f"value is of type <{type(value).__name__}>" - ) - - -def _assert_is_group(value): - if not issubclass(type(value), MutableMapping): - raise TypeError( - "Only Group can be mapped to <object> while value" - f" is of type <{type(value).__name__}>" - ) - - -def _read_object(target_type: Type[T], value) -> T: - _assert_is_group(value) - obj = target_type() - setattr(obj, "_data", value) - return obj - - -def _read_list(value): - _assert_is_dataset(value) - return value[:] - - -def _read_ndarray(value): - _assert_is_dataset(value) - nd_value = zeros(value.shape, value.dtype) - # convert the data set to a numpy array - value.read_direct(nd_value) - return nd_value - - -def _read_dict(target_type: Type[T], value, dict_type) -> Dict[str, T]: - result = dict_type() - reader = _detect_reader(target_type) - for k in value.keys(): - result[k] = reader(value[k]) - if dict_type is not dict: - setattr(result, "_data", value) - return result - - -def _detect_reader(target_type): - origin_type = get_origin(target_type) - if origin_type is dict: - return lambda value: _read_dict(_extract_base_type(target_type), value, dict) - if get_origin(target_type) is list: - return _read_list - if target_type is ndarray: - return _read_ndarray - if issubclass(target_type, dict): - return lambda value: _read_dict( - _extract_base_type(target_type), value, target_type - ) - return lambda value: _read_object(target_type, value) diff --git a/lofar_station_client/file_access/_member_def.py b/lofar_station_client/file_access/_member_def.py new file mode 100644 index 0000000000000000000000000000000000000000..05d079cfdf7229c0ac1e9641b09c4fdbd74333a2 --- /dev/null +++ b/lofar_station_client/file_access/_member_def.py @@ -0,0 +1,65 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Contains HDF5 specific classes and methods to define class members as members +of HDF5 files +""" + +from typing import Type + +from ._readers import DataReader +from ._utils import _extract_type +from ._writers import DataWriter + + +def member(name: str = None, optional: bool = False): + """ + Define a class member as a member of a HDF5 file + """ + return MemberDef(name, optional) + + +# pylint: disable=too-few-public-methods +class MemberDef: + """ + Decorator to handle the transformation of HDF5 groups + and datasets to pythonic objects + """ + + def __init__(self, name: str, optional: bool): + self.name = name + self.optional = optional + self.type: Type + + def __set_name__(self, owner, name): + if self.name is None: + self.name = name + self.type = _extract_type(owner, name) + + def __get__(self, instance, obj_type=None): + if hasattr(instance, "_data_reader"): + reader: DataReader = getattr(instance, "_data_reader") + return reader.read_member(instance, self.name, self.type, self.optional) + + if hasattr(instance, self.attr_name): + return getattr(instance, self.attr_name) + return None + + def __set__(self, instance, value): + if not hasattr(instance, "_data_writer"): + setattr(instance, self.attr_name, value) + return + + writer: DataWriter = getattr(instance, "_data_writer") + writer.write_member(self.name, self.type, value) + + if hasattr(instance, self.attr_name): + delattr(instance, self.attr_name) + + @property + def attr_name(self): + """ + Name used to store the value in the owning object + """ + return f"_v_{self.name}" diff --git a/lofar_station_client/file_access/_monitoring.py b/lofar_station_client/file_access/_monitoring.py new file mode 100644 index 0000000000000000000000000000000000000000..757a44900254cb732c177ff05d3fb3d44573bafc --- /dev/null +++ b/lofar_station_client/file_access/_monitoring.py @@ -0,0 +1,40 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Class wrappers for lists and dictionaries monitoring changes of itself and notifying +the registered event handler about these changes. +""" + + +class MonitoredWrapper: + """ + A wrapper monitoring changes of itself and notifying the registered event handler + about changes. + """ + + def __init__(self, event, instance): + self._event = event + self._instance = instance + + def __setitem__(self, key, value): + self._instance.__setitem__(key, value) + self._event(self._instance) + + def __getitem__(self, item): + return self._instance.__getitem__(item) + + def __getattribute__(self, name): + if name in ["_instance", "_event"]: + return object.__getattribute__(self, name) + attr = object.__getattribute__(self._instance, name) + if hasattr(attr, "__call__"): + + def wrapper(*args, **kwargs): + result = attr(*args, **kwargs) + self._event(self._instance) + return result + + return wrapper + + return attr diff --git a/lofar_station_client/file_access/_readers.py b/lofar_station_client/file_access/_readers.py new file mode 100644 index 0000000000000000000000000000000000000000..8eaf3925427f2b3fb90921b0861569583a70db54 --- /dev/null +++ b/lofar_station_client/file_access/_readers.py @@ -0,0 +1,56 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Contains classes to handle reading +""" +from abc import ABC, abstractmethod +from typing import TypeVar, Generic + +T = TypeVar("T") + + +class FileReader(Generic[T], ABC): + """ + Abstract file reader + """ + + @abstractmethod + def read(self) -> T: + """ + Read the opened file into a pythonic representation specified by target_type. + Will automatically figure out if target_type is a dict or a regular object + """ + + @abstractmethod + def close(self): + """ + Close the underlying file + """ + + def __enter__(self): + return self.read() + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def __del__(self): + self.close() + + +class DataReader(ABC): + """ + Abstract data reader + """ + + @abstractmethod + def read_member(self, obj, name: str, target_type, optional: bool): + """ + Read given member from underlying file + """ + + @abstractmethod + def read_attribute(self, name, owner, from_member, optional): + """ + Read given attribute from underlying file + """ diff --git a/lofar_station_client/file_access/_utils.py b/lofar_station_client/file_access/_utils.py index 8109db5d68a73288aa529c28b524b1731b6f21d5..c895b6f411723cd6ba68f7734e5c3ea1d9b444e3 100644 --- a/lofar_station_client/file_access/_utils.py +++ b/lofar_station_client/file_access/_utils.py @@ -6,7 +6,10 @@ General utils """ from typing import Optional, Type, get_type_hints +from numpy import ndarray + from ._compat_utils import get_args, get_origin +from ._monitoring import MonitoredWrapper def _extract_type(owner: object, name: str) -> Optional[Type]: @@ -22,3 +25,16 @@ def _extract_base_type(target_type: Type): return [ get_args(b)[1] for b in target_type.__orig_bases__ if get_origin(b) is dict ][0] + + +def _wrap(target_type, value, callback): + origin_type = get_origin(target_type) + if origin_type is dict: + return MonitoredWrapper(callback, value) + if get_origin(target_type) is list: + return MonitoredWrapper(callback, value) + if target_type is ndarray: + return MonitoredWrapper(callback, value) + if issubclass(target_type, dict): + return MonitoredWrapper(callback, value) + return value diff --git a/lofar_station_client/file_access/_writers.py b/lofar_station_client/file_access/_writers.py new file mode 100644 index 0000000000000000000000000000000000000000..7b0daf40c89c67d9903b574d98e2e22104c4384c --- /dev/null +++ b/lofar_station_client/file_access/_writers.py @@ -0,0 +1,57 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Contains classes to handle file writing +""" +from abc import ABC, abstractmethod +from typing import TypeVar + +from ._readers import FileReader, DataReader + +T = TypeVar("T") + + +class FileWriter(FileReader[T], ABC): + """ + Abstract file writer + """ + + def __init__(self, create): + self._create = create + + @abstractmethod + def create(self) -> T: + """ + Create the object representing the file + """ + + @abstractmethod + def open(self) -> T: + """ + Create the object representing the file + """ + + def __enter__(self): + if self._create: + return self.create() + return self.open() + + +class DataWriter(DataReader, ABC): + """ + Abstract data writer + """ + + @abstractmethod + def write_member(self, name: str, target_type, value): + """ + Write given member to underlying file + """ + + @abstractmethod + # pylint: disable=too-many-arguments + def write_attribute(self, instance, name, owner, from_member, optional, value): + """ + Write given attribute to underlying file + """ diff --git a/lofar_station_client/file_access/hdf/__init__.py b/lofar_station_client/file_access/hdf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lofar_station_client/file_access/hdf/_hdf5_utils.py b/lofar_station_client/file_access/hdf/_hdf5_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..15775e4aa132fa2e2da1ef8f1779dedbf203976d --- /dev/null +++ b/lofar_station_client/file_access/hdf/_hdf5_utils.py @@ -0,0 +1,59 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Utils to handle transformation of HDF5 specific classes to pythonic objects +""" +import inspect +from collections.abc import MutableMapping +from typing import Type, TypeVar + +from numpy import ndarray + +from .._compat_utils import get_origin, get_annotations + +T = TypeVar("T") + + +def _assert_is_dataset(value): + if issubclass(type(value), MutableMapping): + raise TypeError( + f"Only <Dataset> can be mappet do primitive type while " + f"value is of type <{type(value).__name__}>" + ) + + +def _assert_is_group(value): + if not issubclass(type(value), MutableMapping): + raise TypeError( + "Only Group can be mapped to <object> while value" + f" is of type <{type(value).__name__}>" + ) + + +def _write_ndarray(data, key, value): + _assert_is_group(data) + if key in data: + _assert_is_dataset(data[key]) + del data[key] + + data.create_dataset(key, data=value) + + +def _is_attachable(target_type: Type[T]): + origin_type = get_origin(target_type) + if origin_type is dict: + return False + if get_origin(target_type) is list: + return False + if target_type is ndarray: + return False + return True + + +def _attach_object(target_type: Type[T], instance): + annotations = get_annotations(target_type) + for annotation in annotations: + attr = inspect.getattr_static(target_type, annotation) + if hasattr(instance, attr.attr_name): + setattr(instance, attr.name, getattr(instance, attr.attr_name)) diff --git a/lofar_station_client/file_access/hdf/_hdf_readers.py b/lofar_station_client/file_access/hdf/_hdf_readers.py new file mode 100644 index 0000000000000000000000000000000000000000..8ee6de5078b8280107fc08be13cc2784cc1cd01b --- /dev/null +++ b/lofar_station_client/file_access/hdf/_hdf_readers.py @@ -0,0 +1,152 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Contains classes to handle file reading +""" +from inspect import getattr_static +from typing import TypeVar, Type, Dict + +import h5py +from numpy import ndarray, zeros + +from ._hdf5_utils import ( + _assert_is_group, + _assert_is_dataset, +) +from .._compat_utils import get_origin +from .._readers import FileReader, DataReader +from .._utils import _extract_base_type + +T = TypeVar("T") + + +class HdfFileReader(FileReader[T]): + """ + HDF5 specific file reader + """ + + def __init__(self, name, target_type): + self._is_closed = None + self._target_type = target_type + self._open_file(name) + + def _open_file(self, name): + self._hdf5_file = h5py.File(name, "r") + self._is_closed = False + + def read(self) -> T: + """ + Read the opened file into a pythonic representation specified by target_type. + Will automatically figure out if target_type is a dict or a regular object + """ + reader = HdfDataReader.detect_reader( + self._target_type, HdfDataReader(self, self._hdf5_file) + ) + obj = reader(self._hdf5_file) + return obj + + def close(self): + """ + Close the underlying HDF file + """ + if not self._is_closed: + self._is_closed = True + del self._hdf5_file + + +class HdfDataReader(DataReader): + """ + HDF data reader + """ + + def __init__(self, file_reader: HdfFileReader, data): + self.file_reader = file_reader + self.data = data + + def read_member(self, obj, name, target_type, optional): + if name not in self.data: + if optional: + return None + raise KeyError(f"Could not find required key {name}") + + reader = self.detect_reader( + target_type, self.__class__(self.file_reader, self.data[name]) + ) + return reader(self.data[name]) + + def read_attribute(self, name, owner, from_member, optional): + attrs: dict + if from_member is None: + attrs = self.data.attrs + else: + member = getattr_static(owner, from_member) + attrs = self.data[member.name].attrs + + if name not in attrs: + if optional: + return None + raise KeyError(f"Could not find required attribute key {name}") + + return attrs[name] + + @classmethod + def _read_object( + cls, target_type: Type[T], value, file_reader: "HdfDataReader" + ) -> T: + _assert_is_group(value) + obj = target_type() + setattr(obj, "_data_reader", cls(file_reader.file_reader, value)) + return obj + + @staticmethod + def _read_list(value): + _assert_is_dataset(value) + return list(value[:]) + + @staticmethod + def _read_ndarray(value): + _assert_is_dataset(value) + nd_value = zeros(value.shape, value.dtype) + # convert the data set to a numpy array + value.read_direct(nd_value) + return nd_value + + @classmethod + def _read_dict( + cls, target_type: Type[T], value, dict_type, data_reader: "HdfDataReader" + ) -> Dict[str, T]: + result = dict_type() + reader = cls.detect_reader(target_type, data_reader) + for k in value.keys(): + result[k] = reader(value[k]) + if dict_type is not dict: + setattr(result, "_data_reader", cls(data_reader.file_reader, value)) + return result + + @classmethod + def detect_reader(cls, target_type, data_reader: "HdfDataReader"): + """ + Detect the required reader based on expected type + """ + origin_type = get_origin(target_type) + if origin_type is dict: + return lambda value: cls._read_dict( + _extract_base_type(target_type), value, dict, data_reader + ) + if get_origin(target_type) is list: + return cls._read_list + if target_type is ndarray: + return cls._read_ndarray + if issubclass(target_type, dict): + return lambda value: cls._read_dict( + _extract_base_type(target_type), value, target_type, data_reader + ) + return lambda value: cls._read_object(target_type, value, data_reader) + + +def read_hdf5(name: str, target_type: Type[T]) -> FileReader[T]: + """ + Open a HDF5 file by name/path + """ + return HdfFileReader[T](name, target_type) diff --git a/lofar_station_client/file_access/hdf/_hdf_writers.py b/lofar_station_client/file_access/hdf/_hdf_writers.py new file mode 100644 index 0000000000000000000000000000000000000000..583e2bf0c40af179ddc1f4217d1944d43015700a --- /dev/null +++ b/lofar_station_client/file_access/hdf/_hdf_writers.py @@ -0,0 +1,223 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +""" +Contains classes to handle file writing +""" +from inspect import getattr_static +from typing import TypeVar, Type, Dict + +import h5py +from numpy import ndarray + +from ._hdf5_utils import ( + _is_attachable, + _attach_object, + _write_ndarray, + _assert_is_group, +) +from ._hdf_readers import HdfFileReader, HdfDataReader +from .._writers import FileWriter, DataWriter +from .._utils import _wrap, _extract_base_type +from .._compat_utils import get_origin + +T = TypeVar("T") + + +class HdfFileWriter(HdfFileReader[T], FileWriter[T]): + """ + HDF5 specific file writer + """ + + def __init__(self, name, target_type, create): + self._create = create + self.writers: list[HdfDataWriter] = [] + super().__init__(name, target_type) + + def _open_file(self, name): + self._hdf5_file = h5py.File(name, "w" if self._create else "a") + self._is_closed = False + + def flush(self): + """ + Flush all registered writers + """ + for writer in self.writers: + writer.flush() + self.writers = [] + + def close(self): + self.flush() + super().close() + + def open(self) -> T: + return self.create() + + def create(self) -> T: + """ + Create the object representing the HDF file + """ + data_writer = HdfDataWriter(self, self._hdf5_file) + reader = HdfDataWriter.detect_reader(self._target_type, data_writer) + obj = reader(self._hdf5_file) + if isinstance(obj, dict): + obj = _wrap( + self._target_type, + obj, + lambda value: HdfDataWriter.write_dict( + _extract_base_type(self._target_type), + self._hdf5_file, + value, + data_writer, + ), + ) + setattr(obj, "_data_writer", data_writer) + return obj + + +class HdfDataWriter(HdfDataReader, DataWriter): + """ + HDF data writer + """ + + def read_member(self, obj, name, target_type, optional): + instance = super().read_member(obj, name, target_type, optional) + + return _wrap(target_type, instance, lambda a: setattr(obj, name, a)) + + @classmethod + def _read_dict( + cls, target_type: Type[T], value, dict_type, data_reader: "HdfDataWriter" + ) -> Dict[str, T]: + obj = super()._read_dict(target_type, value, dict_type, data_reader) + if dict_type is not dict: + setattr(obj, "_data_writer", cls(data_reader.file_writer, value)) + return obj + + @classmethod + def _read_object( + cls, target_type: Type[T], value, file_reader: "HdfDataWriter" + ) -> T: + obj = super()._read_object(target_type, value, file_reader) + setattr(obj, "_data_writer", cls(file_reader.file_writer, value)) + return obj + + def __init__(self, file_writer: HdfFileWriter, data): + self.file_writer = file_writer + self.file_writer.writers.append(self) + self.data = data + self.write_actions = [] + super().__init__(file_writer, data) + + def write_member(self, name: str, target_type: Type[T], value): + data = self.data + writer = self.detect_writer(target_type, self) + writer(data, name, value) + + if _is_attachable(target_type): + _attach_object(target_type, value) + + def flush(self): + """ + Executed all pending write actions + """ + for action in self.write_actions: + action() + + # pylint: disable=too-many-arguments + def write_attribute(self, instance, name, owner, from_member, optional, value): + self.write_actions.append( + lambda: self._write_attribute(name, owner, from_member, value) + ) + + def _write_attribute(self, name, owner, from_member, value): + attrs = self._resolve_attrs(owner, from_member) + attrs[name] = value + + def _resolve_attrs(self, owner, from_member): + """ + Finds the right attribute to write into + """ + if from_member is None: + return self.data.attrs + + member = getattr_static(owner, from_member) + return self.data[member.name].attrs + + @classmethod + def detect_writer(cls, target_type, data_writer: "HdfDataWriter"): + """ + Detect required writer based on expected type + """ + origin_type = get_origin(target_type) + if origin_type is dict: + return lambda data, key, value: cls._write_dict_group( + _extract_base_type(target_type), data, key, value, data_writer + ) + if get_origin(target_type) is list: + return _write_ndarray + if target_type is ndarray: + return _write_ndarray + if issubclass(target_type, dict): + return lambda data, key, value: cls._write_dict_group( + _extract_base_type(target_type), data, key, value, data_writer + ) + return lambda data, key, value: cls._write_object( + target_type, data, key, value, data_writer + ) + + @classmethod + # pylint: disable=too-many-arguments + def _write_dict_group( + cls, target_type: Type[T], data, key, value, data_writer: "HdfDataWriter" + ): + _assert_is_group(data) + if key not in data: + data.create_group(key) + cls.write_dict( + target_type, data[key], value, cls(data_writer.file_writer, data[key]) + ) + + @classmethod + def write_dict( + cls, target_type: Type[T], data, value, data_writer: "HdfDataWriter" + ): + """ + Write given dictionary to given data group + """ + _assert_is_group(data) + for k in data.keys(): + if k not in value: + del data[k] + writer = HdfDataWriter.detect_writer(target_type, data_writer) + for k in value.keys(): + writer(data, k, value[k]) + + @classmethod + # pylint: disable=too-many-arguments + def _write_object( + cls, target_type: Type[T], data, key, value: T, data_writer: "HdfDataWriter" + ): + _assert_is_group(data) + if key in data: + _assert_is_group(data[key]) + else: + data.create_group(key) + data_writer = cls(data_writer.file_writer, data[key]) + setattr(value, "_data_writer", data_writer) + setattr(value, "_data_reader", data_writer) + _attach_object(target_type, value) + + +def open_hdf5(name: str, target_type: Type[T]) -> FileWriter[T]: + """ + Open a HDF5 file by name/path + """ + return HdfFileWriter[T](name, target_type, False) + + +def create_hdf5(name: str, target_type: Type[T]) -> FileWriter[T]: + """ + Create a HDF5 file by name/path + """ + return HdfFileWriter[T](name, target_type, True) diff --git a/lofar_station_client/file_access/hdf_file_reader.py b/lofar_station_client/file_access/hdf_file_reader.py deleted file mode 100644 index a98a4ccf1aca44591594e4552a9dbaa1dcdc57f1..0000000000000000000000000000000000000000 --- a/lofar_station_client/file_access/hdf_file_reader.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) -# SPDX-License-Identifier: Apache-2.0 - -""" -Contains classes to handle file reading -""" - -from typing import TypeVar, Generic - -import h5py - -from ._hdf5_utils import _detect_reader - -T = TypeVar("T") - - -class Hdf5FileReader(Generic[T]): - """ - HDF5 specific file reader - """ - - def __init__(self, name, target_type): - self._hdf5_file = h5py.File(name, "r") - self._target_type = target_type - self._is_closed = False - - def read(self) -> T: - """ - Read the opened file into a pythonic representation specified by target_type. - Will automatically figure out if target_type is a dict or a regular object - """ - reader = _detect_reader(self._target_type) - return reader(self._hdf5_file) - - def close(self): - """ - Close the underlying HDF file - """ - if not self._is_closed: - self._is_closed = True - del self._hdf5_file - - def __enter__(self): - return self.read() - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - - def __del__(self): - self.close() - - -def read_hdf5(name: str, target_type: T): - """ - Open a HDF5 file by name/path - """ - return Hdf5FileReader[T](name, target_type) diff --git a/tests/file_access/test_file_reader.py b/tests/file_access/test_file_reader.py index c731ebacf7b716fccbee594c17c3bebbd5219186..af7f573cb7b13bbf88da5f3846832495aef470a0 100644 --- a/tests/file_access/test_file_reader.py +++ b/tests/file_access/test_file_reader.py @@ -57,24 +57,18 @@ class TestHdf5FileReader(base.TestCase): ) as ds: self.assertEqual(21, len(ds.keys())) item = ds["SST_2022-11-15T14:21:59.000+00:00"] - self.assertFalse( - ( - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - - item.nof_payload_errors - ).any() + self.assertEqual( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + item.nof_payload_errors, ) # double read to check if (cached) value is the same - self.assertFalse( - ( - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - - item.nof_payload_errors - ).any() + self.assertEqual( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + item.nof_payload_errors, ) - self.assertFalse( - ( - [12, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - - item.nof_valid_payloads - ).any() + self.assertEqual( + [12, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + item.nof_valid_payloads, ) self.assertIsNone(item.non_existent) self.assertEqual(192, len(item.values)) diff --git a/tests/file_access/test_file_writer.py b/tests/file_access/test_file_writer.py new file mode 100644 index 0000000000000000000000000000000000000000..c055cb8941aae58904860d2381c0e6a2b84be4ac --- /dev/null +++ b/tests/file_access/test_file_writer.py @@ -0,0 +1,155 @@ +# Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +# SPDX-License-Identifier: Apache-2.0 + +from os.path import dirname +from typing import List, Dict + +from numpy import ndarray, array + +from lofar_station_client.file_access import ( + member, + attribute, + create_hdf5, + read_hdf5, + open_hdf5, +) +from tests import base + + +class SimpleSet: + values: ndarray = member() + + +class DataSubSet: + values: List[int] = member() + dict_test_ndarray: Dict[str, ndarray] = member() + dict_test_object: Dict[str, SimpleSet] = member() + + +class DataSet: + observation_station: str = attribute() + observation_source: str = attribute(from_member="sub_set") + nof_payload_errors: List[int] = member() + values: List[List[float]] = member() + sub_set: DataSubSet = member(name="test") + non_existent: DataSubSet = member(optional=True) + + +class TestHdf5FileWriter(base.TestCase): + def test_simple_writing(self): + with create_hdf5(dirname(__file__) + "/test_simple_writing.h5", DataSet) as ds: + ds.observation_station = "CS001" + ds.nof_payload_errors = [1, 2, 3, 4, 5, 6] + ds.values = [[2.0], [3.0], [4.0]] + ds.sub_set = DataSubSet() + ds.sub_set.values = [5, 4, 3, 2] + ds.observation_source = "CasA" + + with read_hdf5(dirname(__file__) + "/test_simple_writing.h5", DataSet) as ds: + self.assertEqual("CS001", ds.observation_station) + self.assertEqual([1, 2, 3, 4, 5, 6], ds.nof_payload_errors) + self.assertEqual([[2.0], [3.0], [4.0]], ds.values) + self.assertIsNotNone(ds.sub_set) + self.assertEqual([5, 4, 3, 2], ds.sub_set.values) + self.assertEqual("CasA", ds.observation_source) + + def test_list_writing(self): + with create_hdf5( + dirname(__file__) + "/test_list_writing.h5", DataSubSet + ) as dss: + dss.values = [2, 3, 4, 5] + dss.values.append(1) + + with read_hdf5(dirname(__file__) + "/test_list_writing.h5", DataSubSet) as dss: + self.assertEqual([2, 3, 4, 5, 1], dss.values) + + def test_dict_writing(self): + with create_hdf5( + dirname(__file__) + "/test_dict_writing.h5", Dict[str, ndarray] + ) as d: + d["test_1"] = array([1, 2, 3, 4, 5, 6]) + d["test_2"] = array([6, 5, 4, 1]) + with read_hdf5( + dirname(__file__) + "/test_dict_writing.h5", Dict[str, ndarray] + ) as d: + self.assertFalse(([1, 2, 3, 4, 5, 6] - d["test_1"]).any()) + self.assertFalse(([6, 5, 4, 1] - d["test_2"]).any()) + + def test_dict_altering(self): + with create_hdf5( + dirname(__file__) + "/test_dict_altering.h5", DataSubSet + ) as dss: + dss.dict_test_ndarray = { + "test_1": array([2, 4, 6]), + "test_2": array([1, 3, 5]), + } + dss.dict_test_ndarray["test_3"] = array([9, 8, 7]) + dss.dict_test_ndarray.pop("test_1") + ss = SimpleSet() + ss.values = array([4, 9, 3]) + dss.dict_test_object = {"test_99": ss} + dss.dict_test_object["test_99"].values[0] = 5 + dss.dict_test_object["test_98"] = SimpleSet() + dss.dict_test_object["test_98"].values = array([4, 9, 3]) + with read_hdf5(dirname(__file__) + "/test_dict_altering.h5", DataSubSet) as dss: + self.assertTrue("test_2" in dss.dict_test_ndarray) + self.assertTrue("test_3" in dss.dict_test_ndarray) + self.assertFalse(([1, 3, 5] - dss.dict_test_ndarray["test_2"]).any()) + self.assertFalse(([9, 8, 7] - dss.dict_test_ndarray["test_3"]).any()) + self.assertTrue("test_99" in dss.dict_test_object) + self.assertTrue("test_98" in dss.dict_test_object) + self.assertFalse(([5, 9, 3] - dss.dict_test_object["test_99"].values).any()) + self.assertFalse(([4, 9, 3] - dss.dict_test_object["test_98"].values).any()) + + def test_object_access(self): + ds = DataSet() + ds.observation_station = "CS001" + ds.nof_payload_errors = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ds.values = [[1.0]] + ds.sub_set = DataSubSet() + ds.sub_set.values = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ds.observation_source = "CasA" + + self.assertEqual("CS001", ds.observation_station) + self.assertEqual( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ds.nof_payload_errors + ) + self.assertEqual([[1.0]], ds.values) + self.assertIsNotNone(ds.sub_set) + self.assertEqual( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ds.sub_set.values + ) + self.assertEqual("CasA", ds.observation_source) + + def test_attach_object(self): + with create_hdf5(dirname(__file__) + "/test_attach_object.h5", DataSet) as ds: + sub_set = DataSubSet() + sub_set.values = [7, 4, 9, 2, 9] + ds.sub_set = sub_set + ds.observation_source = "CasA" + with read_hdf5(dirname(__file__) + "/test_attach_object.h5", DataSet) as ds: + self.assertEqual([7, 4, 9, 2, 9], ds.sub_set.values) + self.assertEqual("CasA", ds.observation_source) + + def test_open_write(self): + with create_hdf5(dirname(__file__) + "/test_open_write.h5", DataSet) as ds: + ds.observation_station = "CS001" + ds.nof_payload_errors = [1, 2, 3, 4, 5, 6] + ds.values = [[2.0], [3.0], [4.0]] + ds.sub_set = DataSubSet() + ds.sub_set.values = [5, 4, 3, 2] + ds.observation_source = "CasA" + + with open_hdf5(dirname(__file__) + "/test_open_write.h5", DataSet) as ds: + ds.nof_payload_errors.append(7) + ds.values.append([5.0]) + ds.observation_source = "ACAS" + ds.sub_set.values = [1, 2, 3] + + with read_hdf5(dirname(__file__) + "/test_open_write.h5", DataSet) as ds: + self.assertEqual("CS001", ds.observation_station) + self.assertEqual([1, 2, 3, 4, 5, 6, 7], ds.nof_payload_errors) + self.assertEqual([[2.0], [3.0], [4.0], [5.0]], ds.values) + self.assertIsNotNone(ds.sub_set) + self.assertEqual([1, 2, 3], ds.sub_set.values) + self.assertEqual("ACAS", ds.observation_source) diff --git a/tests/file_access/test_monitored_wrapper.py b/tests/file_access/test_monitored_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..117aaf27db441937e653414dd5201e2d0710d383 --- /dev/null +++ b/tests/file_access/test_monitored_wrapper.py @@ -0,0 +1,35 @@ +from numpy import array + +from lofar_station_client.file_access._monitoring import MonitoredWrapper +from tests import base + + +class TestMonitoredWrapper(base.TestCase): + def test_list(self): + invocations = [] + + def event(a): + invocations.append(f"Invoked with {a}") + + l1 = MonitoredWrapper(event, []) + l1.append(1) + self.assertEqual("Invoked with [1]", invocations[0]) + l1.append(2) + self.assertEqual("Invoked with [1, 2]", invocations[1]) + l1.pop() + self.assertEqual("Invoked with [1]", invocations[2]) + + l2 = MonitoredWrapper(event, [1, 2, 3, 4]) + l2.append(1) + self.assertEqual("Invoked with [1, 2, 3, 4, 1]", invocations[3]) + l2.append(2) + self.assertEqual("Invoked with [1, 2, 3, 4, 1, 2]", invocations[4]) + l2.pop() + self.assertEqual("Invoked with [1, 2, 3, 4, 1]", invocations[5]) + + l2[0] = 99 + self.assertEqual(99, l2[0]) + self.assertEqual("Invoked with [99, 2, 3, 4, 1]", invocations[6]) + + na = MonitoredWrapper(event, array([2, 3, 4])) + self.assertEqual((3,), na.shape) diff --git a/tox.ini b/tox.ini index a358069440d42dfbfe9790a122a5cca328be7acf..b0b1ae1db5b4977e5e3473392a132e1632e5c6da 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,7 @@ skipsdist = True [testenv] usedevelop = True +package = editable-legacy setenv = LANGUAGE=en_US LC_ALL=en_US.UTF-8