diff --git a/lofar_station_client/file_access/README.md b/lofar_station_client/file_access/README.md index 9f5e04b9a32ca655e7f6dd1d5917b240b6cc5c7d..07f7e42201b3deed3ac1c7274ca865403c864a25 100644 --- a/lofar_station_client/file_access/README.md +++ b/lofar_station_client/file_access/README.md @@ -66,14 +66,9 @@ these attributes. Therefor `attribute` allows to specify another member in the c ## Read a HDF file -A file can be read using the `Hdf5FileReader` class: +A file can be read using `read_hdf5`: ```python -with read_hdf5('file_name.h5') as f: - data = f.read(Data) +with read_hdf5('file_name.h5', Data) as data: + a = data.first_attr ``` - -It should always be used in a `with` block to make sure the HDF5 file handles are cleand up properly. - -Currently, the data object read from the file is only usable within the `with` block since the data is lazily loaded. -If needed eager loading could be added in the future. diff --git a/lofar_station_client/file_access/__init__.py b/lofar_station_client/file_access/__init__.py index 8bdfbc149f839a0f248b83f2be9a91f745c22cce..661011d5f68854681f05fc8194dd1da01c8459e9 100644 --- a/lofar_station_client/file_access/__init__.py +++ b/lofar_station_client/file_access/__init__.py @@ -8,6 +8,6 @@ Contains classes to interact with (hdf5) files from ._hdf5_attribute_def import attribute from ._hdf5_member_def import member -from .file_reader import Hdf5FileReader, read_hdf5 +from .hdf_file_reader import Hdf5FileReader, read_hdf5 __all__ = ["Hdf5FileReader", "attribute", "member", "read_hdf5"] diff --git a/lofar_station_client/file_access/file_reader.py b/lofar_station_client/file_access/hdf_file_reader.py similarity index 73% rename from lofar_station_client/file_access/file_reader.py rename to lofar_station_client/file_access/hdf_file_reader.py index 1950b668131b2e98015dd07c36ebba1e7ca9faa0..a98a4ccf1aca44591594e4552a9dbaa1dcdc57f1 100644 --- a/lofar_station_client/file_access/file_reader.py +++ b/lofar_station_client/file_access/hdf_file_reader.py @@ -5,7 +5,7 @@ Contains classes to handle file reading """ -from typing import TypeVar, Type +from typing import TypeVar, Generic import h5py @@ -14,21 +14,22 @@ from ._hdf5_utils import _detect_reader T = TypeVar("T") -class Hdf5FileReader: +class Hdf5FileReader(Generic[T]): """ HDF5 specific file reader """ - def __init__(self, name): + def __init__(self, name, target_type): self._hdf5_file = h5py.File(name, "r") + self._target_type = target_type self._is_closed = False - def read(self, target_type: Type[T]) -> T: + def read(self) -> T: """ Read the opened file into a pythonic representation specified by target_type. Will automatically figure out if target_type is a dict or a regular object """ - reader = _detect_reader(target_type) + reader = _detect_reader(self._target_type) return reader(self._hdf5_file) def close(self): @@ -40,7 +41,7 @@ class Hdf5FileReader: del self._hdf5_file def __enter__(self): - return self + return self.read() def __exit__(self, exc_type, exc_val, exc_tb): self.close() @@ -49,8 +50,8 @@ class Hdf5FileReader: self.close() -def read_hdf5(name: str) -> Hdf5FileReader: +def read_hdf5(name: str, target_type: T): """ Open a HDF5 file by name/path """ - return Hdf5FileReader(name) + return Hdf5FileReader[T](name, target_type) diff --git a/tests/file_access/test_file_reader.py b/tests/file_access/test_file_reader.py index e7f5f2443cbcf61fbd6d524f8973f20632728436..c731ebacf7b716fccbee594c17c3bebbd5219186 100644 --- a/tests/file_access/test_file_reader.py +++ b/tests/file_access/test_file_reader.py @@ -53,9 +53,8 @@ class CalTableDict(Dict[str, Dict[str, ndarray]]): class TestHdf5FileReader(base.TestCase): def test_file_reading(self): with read_hdf5( - dirname(__file__) + "/SST_2022-11-15-14-21-39.h5" - ) as file_reader: - ds = file_reader.read(Dict[str, DataSet]) + dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, DataSet] + ) as ds: self.assertEqual(21, len(ds.keys())) item = ds["SST_2022-11-15T14:21:59.000+00:00"] self.assertFalse( @@ -90,8 +89,7 @@ class TestHdf5FileReader(base.TestCase): self.assertEqual(100, len(item.sub_set.values)) def test_read_attribute(self): - with read_hdf5(dirname(__file__) + "/cal-test.h5") as file_reader: - ds = file_reader.read(AttrDataSet) + with read_hdf5(dirname(__file__) + "/cal-test.h5", AttrDataSet) as ds: self.assertEqual("test-station", ds.observation_station) self.assertEqual("dset_attr", ds.test_attr) self.assertIsNone(ds.observation_station_optional) @@ -103,16 +101,14 @@ class TestHdf5FileReader(base.TestCase): ) def test_read_ndarray(self): - with read_hdf5(dirname(__file__) + "/cal-test.h5") as file_reader: - ds = file_reader.read(AttrDataSet) + with read_hdf5(dirname(__file__) + "/cal-test.h5", AttrDataSet) as ds: d = ds.calibration_data self.assertTrue(isinstance(d, ndarray)) self.assertEqual(512, d.shape[0]) self.assertEqual(96, d.shape[1]) def test_read_derived_dict(self): - with read_hdf5(dirname(__file__) + "/cal-test-dict.h5") as file_reader: - ds = file_reader.read(CalTable) + with read_hdf5(dirname(__file__) + "/cal-test-dict.h5", CalTable) as ds: self.assertEqual(5, len(ds)) self.assertEqual("test-station", ds.observation_station) ant_2 = ds["ant_2"] @@ -122,8 +118,7 @@ class TestHdf5FileReader(base.TestCase): self.assertEqual("ant_2_y", ant_2.y_attr) def test_read_derived_double_dict(self): - with read_hdf5(dirname(__file__) + "/cal-test-dict.h5") as file_reader: - ds = file_reader.read(CalTableDict) + with read_hdf5(dirname(__file__) + "/cal-test-dict.h5", CalTableDict) as ds: self.assertEqual(5, len(ds)) ant_2 = ds["ant_2"] self.assertTrue("x" in ant_2) @@ -141,9 +136,8 @@ class TestHdf5FileReader(base.TestCase): ) with read_hdf5( - dirname(__file__) + "/SST_2022-11-15-14-21-39.h5" - ) as file_reader: - ds = file_reader.read(ObjectDataSet) + dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", ObjectDataSet + ) as ds: self.assertEqual( ["nof_payload_errors", "nof_valid_payloads", "values"], list(ds.item_1.keys()), @@ -161,9 +155,8 @@ class TestHdf5FileReader(base.TestCase): sub_set: List[int] = member(name="test") with read_hdf5( - dirname(__file__) + "/SST_2022-11-15-14-21-39.h5" - ) as file_reader: - ds = file_reader.read(Dict[str, BrokenDataSet]) + dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, BrokenDataSet] + ) as ds: item = ds["SST_2022-11-15T14:21:39.000+00:00"] with self.assertRaises(TypeError): _ = item.nof_payload_errors @@ -173,9 +166,7 @@ class TestHdf5FileReader(base.TestCase): _ = item.sub_set def test_reader_close(self): - file_reader = read_hdf5(dirname(__file__) + "/SST_2022-11-15-14-21-39.h5") + file_reader = read_hdf5( + dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, DataSet] + ) file_reader.close() - - with read_hdf5(dirname(__file__) + "/SST_2022-11-15-14-21-39.h5") as f: - f.close() - f.close()