Skip to content
Snippets Groups Projects
Commit 58272b84 authored by Hannes Feldt's avatar Hannes Feldt
Browse files

Merge branch 'L2SS-1114-hdf5_reader_wrapper_improvement' into 'main'

L2SS-1114: Add some improvements in how to use the HDF file reader

Closes L2SS-1114

See merge request !25
parents 68df4aa1 aabad873
Branches
Tags 0.11.0
1 merge request!25L2SS-1114: Add some improvements in how to use the HDF file reader
Pipeline #40506 passed
......@@ -66,14 +66,9 @@ these attributes. Therefor `attribute` allows to specify another member in the c
## Read a HDF file
A file can be read using the `Hdf5FileReader` class:
A file can be read using `read_hdf5`:
```python
with read_hdf5('file_name.h5') as f:
data = f.read(Data)
with read_hdf5('file_name.h5', Data) as data:
a = data.first_attr
```
It should always be used in a `with` block to make sure the HDF5 file handles are cleand up properly.
Currently, the data object read from the file is only usable within the `with` block since the data is lazily loaded.
If needed eager loading could be added in the future.
......@@ -8,6 +8,6 @@ Contains classes to interact with (hdf5) files
from ._hdf5_attribute_def import attribute
from ._hdf5_member_def import member
from .file_reader import Hdf5FileReader, read_hdf5
from .hdf_file_reader import Hdf5FileReader, read_hdf5
__all__ = ["Hdf5FileReader", "attribute", "member", "read_hdf5"]
......@@ -5,7 +5,7 @@
Contains classes to handle file reading
"""
from typing import TypeVar, Type
from typing import TypeVar, Generic
import h5py
......@@ -14,21 +14,22 @@ from ._hdf5_utils import _detect_reader
T = TypeVar("T")
class Hdf5FileReader:
class Hdf5FileReader(Generic[T]):
"""
HDF5 specific file reader
"""
def __init__(self, name):
def __init__(self, name, target_type):
self._hdf5_file = h5py.File(name, "r")
self._target_type = target_type
self._is_closed = False
def read(self, target_type: Type[T]) -> T:
def read(self) -> T:
"""
Read the opened file into a pythonic representation specified by target_type.
Will automatically figure out if target_type is a dict or a regular object
"""
reader = _detect_reader(target_type)
reader = _detect_reader(self._target_type)
return reader(self._hdf5_file)
def close(self):
......@@ -40,7 +41,7 @@ class Hdf5FileReader:
del self._hdf5_file
def __enter__(self):
return self
return self.read()
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
......@@ -49,8 +50,8 @@ class Hdf5FileReader:
self.close()
def read_hdf5(name: str) -> Hdf5FileReader:
def read_hdf5(name: str, target_type: T):
"""
Open a HDF5 file by name/path
"""
return Hdf5FileReader(name)
return Hdf5FileReader[T](name, target_type)
......@@ -53,9 +53,8 @@ class CalTableDict(Dict[str, Dict[str, ndarray]]):
class TestHdf5FileReader(base.TestCase):
def test_file_reading(self):
with read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5"
) as file_reader:
ds = file_reader.read(Dict[str, DataSet])
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, DataSet]
) as ds:
self.assertEqual(21, len(ds.keys()))
item = ds["SST_2022-11-15T14:21:59.000+00:00"]
self.assertFalse(
......@@ -90,8 +89,7 @@ class TestHdf5FileReader(base.TestCase):
self.assertEqual(100, len(item.sub_set.values))
def test_read_attribute(self):
with read_hdf5(dirname(__file__) + "/cal-test.h5") as file_reader:
ds = file_reader.read(AttrDataSet)
with read_hdf5(dirname(__file__) + "/cal-test.h5", AttrDataSet) as ds:
self.assertEqual("test-station", ds.observation_station)
self.assertEqual("dset_attr", ds.test_attr)
self.assertIsNone(ds.observation_station_optional)
......@@ -103,16 +101,14 @@ class TestHdf5FileReader(base.TestCase):
)
def test_read_ndarray(self):
with read_hdf5(dirname(__file__) + "/cal-test.h5") as file_reader:
ds = file_reader.read(AttrDataSet)
with read_hdf5(dirname(__file__) + "/cal-test.h5", AttrDataSet) as ds:
d = ds.calibration_data
self.assertTrue(isinstance(d, ndarray))
self.assertEqual(512, d.shape[0])
self.assertEqual(96, d.shape[1])
def test_read_derived_dict(self):
with read_hdf5(dirname(__file__) + "/cal-test-dict.h5") as file_reader:
ds = file_reader.read(CalTable)
with read_hdf5(dirname(__file__) + "/cal-test-dict.h5", CalTable) as ds:
self.assertEqual(5, len(ds))
self.assertEqual("test-station", ds.observation_station)
ant_2 = ds["ant_2"]
......@@ -122,8 +118,7 @@ class TestHdf5FileReader(base.TestCase):
self.assertEqual("ant_2_y", ant_2.y_attr)
def test_read_derived_double_dict(self):
with read_hdf5(dirname(__file__) + "/cal-test-dict.h5") as file_reader:
ds = file_reader.read(CalTableDict)
with read_hdf5(dirname(__file__) + "/cal-test-dict.h5", CalTableDict) as ds:
self.assertEqual(5, len(ds))
ant_2 = ds["ant_2"]
self.assertTrue("x" in ant_2)
......@@ -141,9 +136,8 @@ class TestHdf5FileReader(base.TestCase):
)
with read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5"
) as file_reader:
ds = file_reader.read(ObjectDataSet)
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", ObjectDataSet
) as ds:
self.assertEqual(
["nof_payload_errors", "nof_valid_payloads", "values"],
list(ds.item_1.keys()),
......@@ -161,9 +155,8 @@ class TestHdf5FileReader(base.TestCase):
sub_set: List[int] = member(name="test")
with read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5"
) as file_reader:
ds = file_reader.read(Dict[str, BrokenDataSet])
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, BrokenDataSet]
) as ds:
item = ds["SST_2022-11-15T14:21:39.000+00:00"]
with self.assertRaises(TypeError):
_ = item.nof_payload_errors
......@@ -173,9 +166,7 @@ class TestHdf5FileReader(base.TestCase):
_ = item.sub_set
def test_reader_close(self):
file_reader = read_hdf5(dirname(__file__) + "/SST_2022-11-15-14-21-39.h5")
file_reader = read_hdf5(
dirname(__file__) + "/SST_2022-11-15-14-21-39.h5", Dict[str, DataSet]
)
file_reader.close()
with read_hdf5(dirname(__file__) + "/SST_2022-11-15-14-21-39.h5") as f:
f.close()
f.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment