From d87cbcc4ce2b5c78d23f3da10c4e9ef4debb7c1a Mon Sep 17 00:00:00 2001
From: Stefano Di Frischia <stefano.difrischia@inaf.it>
Date: Fri, 8 Sep 2023 09:35:50 +0000
Subject: [PATCH] Resolve L2SS-1494 "Add gzip compression to hdf5"

---
 README.md                                            | 1 +
 VERSION                                              | 2 +-
 lofar_station_client/file_access/hdf/_hdf_writers.py | 6 +++++-
 lofar_station_client/statistics/writer/VERSION       | 2 +-
 tests/statistics/test_writer.py                      | 5 +++++
 5 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3e3259f..4b2c1f5 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,7 @@ tox -e debug tests.requests.test_prometheus
 ```
 
 ## Releasenotes
+- 0.17.0 - Add gzip compression to HDF5 files in statistics writer
 - 0.16.3 - Fix AntennaField in filename
 - 0.16.2 - Fixed MultiStationObservation.all_connected
          - Renamed SDPPacket.upcast to downcast
diff --git a/VERSION b/VERSION
index 7eb3095..c5523bd 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.16.3
+0.17.0
diff --git a/lofar_station_client/file_access/hdf/_hdf_writers.py b/lofar_station_client/file_access/hdf/_hdf_writers.py
index 0349cef..063defb 100644
--- a/lofar_station_client/file_access/hdf/_hdf_writers.py
+++ b/lofar_station_client/file_access/hdf/_hdf_writers.py
@@ -201,7 +201,11 @@ class HdfDataWriter(HdfDataReader, DataWriter):
             _assert_is_dataset(data[key])
             del data[key]
 
-        data.create_dataset(key, data=value)
+        # GZIP filter ("gzip"). Available with every installation of HDF5.
+        # compression_opts sets the compression level and may be an integer from 0 to 9,
+        # default is 4.
+        # https://docs.h5py.org/en/stable/high/dataset.html#lossless-compression-filters
+        data.create_dataset(key, data=value, compression="gzip", compression_opts=9)
         if target_type is not ndarray and issubclass(target_type, ndarray):
             data_writer = cls(data_writer.file_writer, data[key])
             setattr(value, "_data_writer", data_writer)
diff --git a/lofar_station_client/statistics/writer/VERSION b/lofar_station_client/statistics/writer/VERSION
index 5a2a580..eb49d7c 100644
--- a/lofar_station_client/statistics/writer/VERSION
+++ b/lofar_station_client/statistics/writer/VERSION
@@ -1 +1 @@
-0.6
+0.7
diff --git a/tests/statistics/test_writer.py b/tests/statistics/test_writer.py
index 4333e8b..8597809 100644
--- a/tests/statistics/test_writer.py
+++ b/tests/statistics/test_writer.py
@@ -342,6 +342,11 @@ class TestStatisticsWriterXST(TestStatisticsReaderWriter):
                         "timestamp", dict(f["XST_2021-09-13T13:21:32.000"].attrs)
                     )
 
+                    # check compression
+                    self.assertEqual(
+                        f["XST_2021-09-13T13:21:32.000"].compression, "gzip"
+                    )
+
     def test_xst_multiple_subbands(self):
         with mock.patch.object(entry, "_get_tango_device", self._mock_get_tango_device):
             with TemporaryDirectory() as tmpdir:
-- 
GitLab