Skip to content
Snippets Groups Projects
Commit 58272b84 authored by Hannes Feldt's avatar Hannes Feldt
Browse files

Merge branch 'L2SS-1114-hdf5_reader_wrapper_improvement' into 'main'

L2SS-1114: Add some improvements in how to use the HDF file reader

Closes L2SS-1114

See merge request !25
parents 68df4aa1 aabad873
No related branches found
No related tags found
1 merge request!25L2SS-1114: Add some improvements in how to use the HDF file reader
Pipeline #40506 passed
...@@ -66,14 +66,9 @@ these attributes. Therefor `attribute` allows to specify another member in the c ...@@ -66,14 +66,9 @@ these attributes. Therefor `attribute` allows to specify another member in the c
## Read a HDF file ## Read a HDF file
A file can be read using the `Hdf5FileReader` class: A file can be read using `read_hdf5`:
```python ```python
with read_hdf5('file_name.h5') as f: with read_hdf5('file_name.h5', Data) as data:
data = f.read(Data) a = data.first_attr
``` ```
It should always be used in a `with` block to make sure the HDF5 file handles are cleand up properly.
Currently, the data object read from the file is only usable within the `with` block since the data is lazily loaded.
If needed eager loading could be added in the future.
...@@ -8,6 +8,6 @@ Contains classes to interact with (hdf5) files ...@@ -8,6 +8,6 @@ Contains classes to interact with (hdf5) files
from ._hdf5_attribute_def import attribute from ._hdf5_attribute_def import attribute
from ._hdf5_member_def import member from ._hdf5_member_def import member
from .file_reader import Hdf5FileReader, read_hdf5 from .hdf_file_reader import Hdf5FileReader, read_hdf5
__all__ = ["Hdf5FileReader", "attribute", "member", "read_hdf5"] __all__ = ["Hdf5FileReader", "attribute", "member", "read_hdf5"]
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
Contains classes to handle file reading Contains classes to handle file reading
""" """
from typing import TypeVar, Type from typing import TypeVar, Generic
import h5py import h5py
...@@ -14,21 +14,22 @@ from ._hdf5_utils import _detect_reader ...@@ -14,21 +14,22 @@ from ._hdf5_utils import _detect_reader
T = TypeVar("T") T = TypeVar("T")
class Hdf5FileReader: class Hdf5FileReader(Generic[T]):
""" """
HDF5 specific file reader HDF5 specific file reader
""" """
def __init__(self, name): def __init__(self, name, target_type):
self._hdf5_file = h5py.File(name, "r") self._hdf5_file = h5py.File(name, "r")
self._target_type = target_type
self._is_closed = False self._is_closed = False
def read(self, target_type: Type[T]) -> T: def read(self) -> T:
""" """
Read the opened file into a pythonic representation specified by target_type. Read the opened file into a pythonic representation specified by target_type.
Will automatically figure out if target_type is a dict or a regular object Will automatically figure out if target_type is a dict or a regular object
""" """
reader = _detect_reader(target_type) reader = _detect_reader(self._target_type)
return reader(self._hdf5_file) return reader(self._hdf5_file)
def close(self): def close(self):
...@@ -40,7 +41,7 @@ class Hdf5FileReader: ...@@ -40,7 +41,7 @@ class Hdf5FileReader:
del self._hdf5_file del self._hdf5_file
def __enter__(self): def __enter__(self):
return self return self.read()
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
self.close() self.close()
...@@ -49,8 +50,8 @@ class Hdf5FileReader: ...@@ -49,8 +50,8 @@ class Hdf5FileReader:
self.close() self.close()
def read_hdf5(name: str) -> Hdf5FileReader: def read_hdf5(name: str, target_type: T):
""" """
Open a HDF5 file by name/path Open a HDF5 file by name/path
""" """
return Hdf5FileReader(name) return Hdf5FileReader[T](name, target_type)
...@@ -53,9 +53,8 @@ class CalTableDict(Dict[str, Dict[str, ndarray]]): ...@@ -53,9 +53,8 @@ class CalTableDict(Dict[str, Dict[str, ndarray]]):
class TestHdf5FileReader(base.TestCase): class TestHdf5FileReader(base.TestCase):
def test_file_reading(self): def test_file_reading(self):
with read_hdf5( with read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5" dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, DataSet]
) as file_reader: ) as ds:
ds = file_reader.read(Dict[str, DataSet])
self.assertEqual(21, len(ds.keys())) self.assertEqual(21, len(ds.keys()))
item = ds["SST_2022-11-15T14:21:59.000+00:00"] item = ds["SST_2022-11-15T14:21:59.000+00:00"]
self.assertFalse( self.assertFalse(
...@@ -90,8 +89,7 @@ class TestHdf5FileReader(base.TestCase): ...@@ -90,8 +89,7 @@ class TestHdf5FileReader(base.TestCase):
self.assertEqual(100, len(item.sub_set.values)) self.assertEqual(100, len(item.sub_set.values))
def test_read_attribute(self): def test_read_attribute(self):
with read_hdf5(dirname(__file__) + "/cal-test.h5") as file_reader: with read_hdf5(dirname(__file__) + "/cal-test.h5", AttrDataSet) as ds:
ds = file_reader.read(AttrDataSet)
self.assertEqual("test-station", ds.observation_station) self.assertEqual("test-station", ds.observation_station)
self.assertEqual("dset_attr", ds.test_attr) self.assertEqual("dset_attr", ds.test_attr)
self.assertIsNone(ds.observation_station_optional) self.assertIsNone(ds.observation_station_optional)
...@@ -103,16 +101,14 @@ class TestHdf5FileReader(base.TestCase): ...@@ -103,16 +101,14 @@ class TestHdf5FileReader(base.TestCase):
) )
def test_read_ndarray(self): def test_read_ndarray(self):
with read_hdf5(dirname(__file__) + "/cal-test.h5") as file_reader: with read_hdf5(dirname(__file__) + "/cal-test.h5", AttrDataSet) as ds:
ds = file_reader.read(AttrDataSet)
d = ds.calibration_data d = ds.calibration_data
self.assertTrue(isinstance(d, ndarray)) self.assertTrue(isinstance(d, ndarray))
self.assertEqual(512, d.shape[0]) self.assertEqual(512, d.shape[0])
self.assertEqual(96, d.shape[1]) self.assertEqual(96, d.shape[1])
def test_read_derived_dict(self): def test_read_derived_dict(self):
with read_hdf5(dirname(__file__) + "/cal-test-dict.h5") as file_reader: with read_hdf5(dirname(__file__) + "/cal-test-dict.h5", CalTable) as ds:
ds = file_reader.read(CalTable)
self.assertEqual(5, len(ds)) self.assertEqual(5, len(ds))
self.assertEqual("test-station", ds.observation_station) self.assertEqual("test-station", ds.observation_station)
ant_2 = ds["ant_2"] ant_2 = ds["ant_2"]
...@@ -122,8 +118,7 @@ class TestHdf5FileReader(base.TestCase): ...@@ -122,8 +118,7 @@ class TestHdf5FileReader(base.TestCase):
self.assertEqual("ant_2_y", ant_2.y_attr) self.assertEqual("ant_2_y", ant_2.y_attr)
def test_read_derived_double_dict(self): def test_read_derived_double_dict(self):
with read_hdf5(dirname(__file__) + "/cal-test-dict.h5") as file_reader: with read_hdf5(dirname(__file__) + "/cal-test-dict.h5", CalTableDict) as ds:
ds = file_reader.read(CalTableDict)
self.assertEqual(5, len(ds)) self.assertEqual(5, len(ds))
ant_2 = ds["ant_2"] ant_2 = ds["ant_2"]
self.assertTrue("x" in ant_2) self.assertTrue("x" in ant_2)
...@@ -141,9 +136,8 @@ class TestHdf5FileReader(base.TestCase): ...@@ -141,9 +136,8 @@ class TestHdf5FileReader(base.TestCase):
) )
with read_hdf5( with read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5" dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", ObjectDataSet
) as file_reader: ) as ds:
ds = file_reader.read(ObjectDataSet)
self.assertEqual( self.assertEqual(
["nof_payload_errors", "nof_valid_payloads", "values"], ["nof_payload_errors", "nof_valid_payloads", "values"],
list(ds.item_1.keys()), list(ds.item_1.keys()),
...@@ -161,9 +155,8 @@ class TestHdf5FileReader(base.TestCase): ...@@ -161,9 +155,8 @@ class TestHdf5FileReader(base.TestCase):
sub_set: List[int] = member(name="test") sub_set: List[int] = member(name="test")
with read_hdf5( with read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5" dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, BrokenDataSet]
) as file_reader: ) as ds:
ds = file_reader.read(Dict[str, BrokenDataSet])
item = ds["SST_2022-11-15T14:21:39.000+00:00"] item = ds["SST_2022-11-15T14:21:39.000+00:00"]
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
_ = item.nof_payload_errors _ = item.nof_payload_errors
...@@ -173,9 +166,7 @@ class TestHdf5FileReader(base.TestCase): ...@@ -173,9 +166,7 @@ class TestHdf5FileReader(base.TestCase):
_ = item.sub_set _ = item.sub_set
def test_reader_close(self): def test_reader_close(self):
file_reader = read_hdf5(dirname(__file__) + "/SST_2022-11-15-14-21-39.h5") file_reader = read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, DataSet]
)
file_reader.close() file_reader.close()
with read_hdf5(dirname(__file__) + "/SST_2022-11-15-14-21-39.h5") as f:
f.close()
f.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment