Skip to content
Snippets Groups Projects
Commit 4d842480 authored by Mattia Mancini's avatar Mattia Mancini
Browse files

SW-43: first implementation of the averaging routines

parent 849972ea
No related branches found
No related tags found
1 merge request!44Merge back holography to master
......@@ -36,6 +36,7 @@ CAL/CalibrationCommon/test/t_holography_observation.sh -text
CAL/CalibrationProcessing/CMakeLists.txt -text
CAL/CalibrationProcessing/bin/CMakeLists.txt -text
CAL/CalibrationProcessing/lib/CMakeLists.txt -text
CAL/CalibrationProcessing/lib/__init__.py -text
CAL/CalibrationProcessing/lib/process_holography_dataset.py -text
CAL/CalibrationProcessing/lib/processing.py -text
CAL/CalibrationProcessing/test/CMakeLists.txt -text
......
......@@ -18,3 +18,4 @@
# $Id$
lofar_add_package(CalibrationCommon)
lofar_add_package(CalibrationProcessing)
......@@ -20,6 +20,6 @@
python_install(
__init__.py
processing.py
DESTINATION lofar/calibration/processing)
DESTINATION lofar/calibration)
lofar_add_bin_script(mshologextract.py RENAME mshologextract)
\ No newline at end of file
lofar_add_bin_script(process_holography_dataset.py RENAME process_holography_dataset)
\ No newline at end of file
import numpy
from lofar.calibration.common.datacontainers import HolographyDataset
def generalized_standard_deviation(data_array):
"""
Computes the standard deviation as in the mathematical definition
mean( (x - mean(x))**2. )
:param data_array: input array
:type data_array: numpy.ndarray
:return: the standard deviation
:rtype: data_array.dtype
"""
mean = numpy.mean(data_array)
return numpy.sqrt(
numpy.mean(
numpy.power(data_array - mean, 2)
)
)
def average_values_by_sample(data_array, sample_width):
"""
Average the values present in the data_array with a given sample width
:param data_array: input array
:type data_array: numpy.ndarray
:param sample_width: window width in samples
:type sample_width: int
:return: an array containing the averaged values and the standard deviation
:rtype: numpy.ndarray
"""
new_array_size = _round_up_size(len(data_array), sample_width)
result = numpy.zeros(new_array_size, dtype=[('mean', data_array.dtype),
('std', data_array.dtype)])
# compute the average and the std of all the steps except for the last one
for i in range(new_array_size - 1):
data_array_view = data_array[i * sample_width: (i + 1) * sample_width]
result['mean'][i] = numpy.mean(data_array_view)
result['std'][i] = generalized_standard_deviation(data_array_view)
data_array_view = data_array[(new_array_size - 1) * sample_width:]
result['mean'][-1] = numpy.mean(data_array_view)
result['std'][-1] = generalized_standard_deviation(data_array_view)
return result
def average_datatable_by_sample(data_table_in, sample_width):
"""
Average the datatable with a given sample window width
:param data_table_in: the datatable to be averaged
:type data_table_in: numpy.ndarray
:param sample_width: window width in samples
:return: the array with the averaged values and the array with the standard deviations
:rtype: dict(str:numpy.ndarray)
"""
output_data_table_dtype = data_table_in.dtype
new_array_size = _round_up_size(len(data_table_in), sample_width)
output_array_average = numpy.zeros((new_array_size, ), dtype=output_data_table_dtype)
output_array_std = numpy.zeros((new_array_size,), dtype=output_data_table_dtype)
for field_name in data_table_in.dtype.names:
averaged_content = average_values_by_sample(data_table_in[field_name], sample_width)
output_array_average[field_name] = averaged_content['mean']
output_array_std[field_name] = averaged_content['std']
return dict(mean=output_array_average, std=output_array_std)
def average_dataset_by_sample(input_path, sample_width):
"""
Averaged the dataset with a given sample window width
:param input_path: path to the holography dataset
:type input_path: str
:param sample_width: window width in samples
:type sample_width: int
:return: the averaged dataset structured in a dict of dicts which keys are
the reference station name
the frequency
the beamlet
mean : -> containing the averaged results
std : -> containing the standard deviation of the averaged results
:rtype: dict(str:dict(str:dict(str:dict(str:numpy.ndarray))))
"""
dataset = HolographyDataset.load_from_file(input_path)
out_data = {}
for reference_station, data_per_reference_station in dataset.data.items():
print('hey')
if reference_station not in out_data:
out_data[reference_station] = dict()
out_data_per_reference_station = out_data[reference_station]
for frequency_string, data_per_frequency in data_per_reference_station.items():
if frequency_string not in out_data_per_reference_station:
out_data_per_reference_station[frequency_string] = dict()
outdata_per_beamlet = out_data_per_reference_station[frequency_string]
for beamlet, data_per_beamlet in data_per_frequency.items():
averaged_datatable = average_datatable_by_sample(data_per_beamlet, sample_width)
outdata_per_beamlet['mean'] = averaged_datatable['mean']
outdata_per_beamlet['std'] = averaged_datatable['std']
return out_data
def _round_up_size(array_size, window_size):
"""
Round up the array size given a window size.
If array_size % window_size = 0 it returns array_size/window_size
otherwise it returns array_size/window_size + 1
:param array_size: size of the input array
:param window_size: size of the window to impose over the input array
:return: the new array size after averaging with the given window size
"""
# python does not enforce the sample_width to be an integer and a value between 0 and 1
# will create an oversampling of the array
assert window_size >= 1
new_array_size = int(array_size / window_size)
# add one if the len of the data_array is not divisible by the sample_width
new_array_size += 1 if array_size % window_size else 0
return new_array_size
import logging
import unittest
from lofar.calibration.common.datacontainers import HolographyDataset
from lofar.calibration.processing import average_dataset_by_sample, average_values_by_sample
from glob import glob
logger = logging.getLogger('t_processing')
import numpy
# READ doc/Holography_Data_Set.md! It contains the location from which the
# test data must be downloaded and this file generated.
SAMPLE_DATASET = '/data/test/HolographyObservation/CS001HBA0.hdf5'
class TestProcessing(unittest.TestCase):
def setUp(self):
pass
def test_average_dataset_by_sample(self):
def test_average_array_by_sample_real_values(self):
"""
Test if specifying the number of sample the averaging produces meaningful results
"""
test_data = numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=numpy.float)
expected_result_mean = numpy.array([1.5, 3.5, 5.5, 7.5, 9.5], dtype=numpy.float)
expected_result_std = numpy.array([0.5, 0.5, 0.5, 0.5, 0.5], dtype=numpy.float)
result = average_values_by_sample(test_data, 2)
numpy.testing.assert_almost_equal(expected_result_mean, result['mean'])
numpy.testing.assert_almost_equal(expected_result_std, result['std'])
def test_average_array_by_sample_complex_values(self):
"""
Test if specifing the number of sample the averaging produces meaningful results
Test if specifying the number of sample the averaging produces meaningful results
"""
print(glob('./'))
test_data = numpy.array([1 + 1.j, 2 + 2.j, 3 + 3.j, 4 + 4.j, 5 + 5.j,
6 + 6.j, 7 + 7.j, 8 + 8.j, 9 + 9.j, 10 + 10.j],
dtype=numpy.complex)
expected_result_mean = numpy.array([1.5 + 1.5j, 3.5 + 3.5j,
5.5 + 5.5j, 7.5 + 7.5j, 9.5 + 9.5j],
dtype=numpy.complex)
expected_result_std = numpy.array([0.5 + 0.5j,
0.5 + 0.5j,
0.5 + 0.5j,
0.5 + 0.5j,
0.5 + 0.5j], dtype=numpy.complex)
result = average_values_by_sample(test_data, 2)
numpy.testing.assert_almost_equal(result['mean'], expected_result_mean)
numpy.testing.assert_almost_equal(result['std'], expected_result_std)
def test_average_datatable_by_sample(self):
val = average_dataset_by_sample(SAMPLE_DATASET, 'unused', 2)
print val
if __name__ == '__main__':
logging.basicConfig(format='%(name)s : %(message)s')
unittest.main()
\ No newline at end of file
......@@ -19,4 +19,4 @@
# Run the unit test
source python-coverage.sh
python_coverage_test "*CAL*" t_processing.py
python_coverage_test "*Processing*" t_processing.py
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment